mirror of
https://github.com/openclaw/openclaw.git
synced 2026-05-21 12:04:59 +00:00
task continuity: TASKS.md ledger, post-compaction recovery, entity dedup, credential scanning
Add task ledger (TASKS.md) parsing and stale-task archival for maintaining agent task state across context compactions. Post-compaction recovery injects memory_recall + TASKS.md read steps after auto-compaction. Sleep cycle gains entity dedup (Phase 1d) and credential scanning. Memory flush now extracts active task checkpoints. Compaction instructions prioritize active tasks. Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -292,6 +292,7 @@ export function registerCli(api: OpenClawPluginApi, deps: CliDeps): void {
|
||||
"--skip-semantic",
|
||||
"Skip LLM-based semantic dedup (Phase 1b) and conflict detection (Phase 1c)",
|
||||
)
|
||||
.option("--workspace <dir>", "Workspace directory for TASKS.md cleanup")
|
||||
.action(
|
||||
async (opts: {
|
||||
agent?: string;
|
||||
@@ -305,23 +306,31 @@ export function registerCli(api: OpenClawPluginApi, deps: CliDeps): void {
|
||||
maxSemanticPairs?: string;
|
||||
concurrency?: string;
|
||||
skipSemantic?: boolean;
|
||||
workspace?: string;
|
||||
}) => {
|
||||
console.log("\n🌙 Memory Sleep Cycle");
|
||||
console.log("═════════════════════════════════════════════════════════════");
|
||||
console.log("Seven-phase memory consolidation (Pareto-based):\n");
|
||||
console.log(" Phase 1: Deduplication — Merge near-duplicate memories");
|
||||
console.log("Multi-phase memory consolidation (Pareto-based):\n");
|
||||
console.log(" Phase 1: Deduplication — Merge near-duplicate memories");
|
||||
console.log(
|
||||
" Phase 1b: Semantic Dedup — LLM-based paraphrase detection (0.75–0.95 band)",
|
||||
" Phase 1b: Semantic Dedup — LLM-based paraphrase detection (0.75–0.95 band)",
|
||||
);
|
||||
console.log(" Phase 1c: Conflict Detection — Resolve contradictory memories");
|
||||
console.log(" Phase 1c: Conflict Detection — Resolve contradictory memories");
|
||||
console.log(" Phase 1d: Entity Dedup — Merge duplicate entity nodes");
|
||||
console.log(
|
||||
" Phase 2: Pareto Scoring — Calculate effective scores for all memories",
|
||||
" Phase 2: Pareto Scoring — Calculate effective scores for all memories",
|
||||
);
|
||||
console.log(" Phase 3: Core Promotion — Regular memories above threshold → core");
|
||||
console.log(" Phase 4: Core Demotion — Core memories below threshold → regular");
|
||||
console.log(" Phase 5: Extraction — Extract entities and categorize");
|
||||
console.log(" Phase 6: Decay & Pruning — Remove stale low-importance memories");
|
||||
console.log(" Phase 7: Orphan Cleanup — Remove disconnected nodes\n");
|
||||
console.log(
|
||||
" Phase 3: Core Promotion — Regular memories above threshold → core",
|
||||
);
|
||||
console.log(
|
||||
" Phase 4: Core Demotion — Core memories below threshold → regular",
|
||||
);
|
||||
console.log(" Phase 5: Extraction — Extract entities and categorize");
|
||||
console.log(" Phase 6: Decay & Pruning — Remove stale low-importance memories");
|
||||
console.log(" Phase 7: Orphan Cleanup — Remove disconnected nodes");
|
||||
console.log(" Phase 7b: Credential Scan — Remove memories with leaked secrets");
|
||||
console.log(" Phase 8: Task Ledger Cleanup — Archive stale tasks in TASKS.md\n");
|
||||
|
||||
try {
|
||||
// Validate sleep cycle CLI parameters before running
|
||||
@@ -396,6 +405,9 @@ export function registerCli(api: OpenClawPluginApi, deps: CliDeps): void {
|
||||
|
||||
await db.ensureInitialized();
|
||||
|
||||
// Resolve workspace dir for task ledger cleanup
|
||||
const resolvedWorkspace = opts.workspace?.trim() || undefined;
|
||||
|
||||
const result = await runSleepCycle(db, embeddings, extractionConfig, api.logger, {
|
||||
agentId: opts.agent,
|
||||
dedupThreshold: opts.dedupThreshold ? parseFloat(opts.dedupThreshold) : undefined,
|
||||
@@ -409,18 +421,23 @@ export function registerCli(api: OpenClawPluginApi, deps: CliDeps): void {
|
||||
decayCurves: Object.keys(cfg.decayCurves).length > 0 ? cfg.decayCurves : undefined,
|
||||
extractionBatchSize: batchSize,
|
||||
extractionDelayMs: delay,
|
||||
workspaceDir: resolvedWorkspace,
|
||||
onPhaseStart: (phase) => {
|
||||
const phaseNames: Record<string, string> = {
|
||||
dedup: "Phase 1: Deduplication",
|
||||
semanticDedup: "Phase 1b: Semantic Deduplication",
|
||||
conflict: "Phase 1c: Conflict Detection",
|
||||
entityDedup: "Phase 1d: Entity Deduplication",
|
||||
pareto: "Phase 2: Pareto Scoring",
|
||||
promotion: "Phase 3: Core Promotion",
|
||||
extraction: "Phase 4: Extraction",
|
||||
decay: "Phase 5: Decay & Pruning",
|
||||
cleanup: "Phase 6: Orphan Cleanup",
|
||||
noiseCleanup: "Phase 7: Noise Cleanup",
|
||||
credentialScan: "Phase 7b: Credential Scan",
|
||||
taskLedger: "Phase 8: Task Ledger Cleanup",
|
||||
};
|
||||
console.log(`\n▶ ${phaseNames[phase]}`);
|
||||
console.log(`\n▶ ${phaseNames[phase] ?? phase}`);
|
||||
console.log("─────────────────────────────────────────────────────────────");
|
||||
},
|
||||
onProgress: (_phase, message) => {
|
||||
@@ -457,6 +474,12 @@ export function registerCli(api: OpenClawPluginApi, deps: CliDeps): void {
|
||||
console.log(
|
||||
` Cleanup: ${result.cleanup.entitiesRemoved} entities, ${result.cleanup.tagsRemoved} tags removed`,
|
||||
);
|
||||
console.log(
|
||||
` Task Ledger: ${result.taskLedger.archivedCount} stale tasks archived` +
|
||||
(result.taskLedger.archivedIds.length > 0
|
||||
? ` (${result.taskLedger.archivedIds.join(", ")})`
|
||||
: ""),
|
||||
);
|
||||
if (result.aborted) {
|
||||
console.log("\n⚠️ Sleep cycle was aborted before completion.");
|
||||
}
|
||||
|
||||
327
extensions/memory-neo4j/neo4j-client.entity-dedup.test.ts
Normal file
327
extensions/memory-neo4j/neo4j-client.entity-dedup.test.ts
Normal file
@@ -0,0 +1,327 @@
|
||||
/**
|
||||
* Tests for entity deduplication in neo4j-client.ts.
|
||||
*
|
||||
* Tests findDuplicateEntityPairs() and mergeEntityPair() using mocked Neo4j driver.
|
||||
* Verifies substring-matching logic, mention-count based decisions, and merge behavior.
|
||||
*/
|
||||
|
||||
import { describe, it, expect, vi, beforeEach } from "vitest";
|
||||
import { Neo4jMemoryClient } from "./neo4j-client.js";
|
||||
|
||||
// ============================================================================
|
||||
// Test Helpers
|
||||
// ============================================================================
|
||||
|
||||
function createMockSession() {
|
||||
return {
|
||||
run: vi.fn().mockResolvedValue({ records: [] }),
|
||||
close: vi.fn().mockResolvedValue(undefined),
|
||||
executeWrite: vi.fn(
|
||||
async (work: (tx: { run: ReturnType<typeof vi.fn> }) => Promise<unknown>) => {
|
||||
const mockTx = { run: vi.fn().mockResolvedValue({ records: [] }) };
|
||||
return work(mockTx);
|
||||
},
|
||||
),
|
||||
};
|
||||
}
|
||||
|
||||
function createMockDriver() {
|
||||
return {
|
||||
session: vi.fn().mockReturnValue(createMockSession()),
|
||||
close: vi.fn().mockResolvedValue(undefined),
|
||||
};
|
||||
}
|
||||
|
||||
function createMockLogger() {
|
||||
return {
|
||||
info: vi.fn(),
|
||||
warn: vi.fn(),
|
||||
error: vi.fn(),
|
||||
debug: vi.fn(),
|
||||
};
|
||||
}
|
||||
|
||||
function mockRecord(data: Record<string, unknown>) {
|
||||
return {
|
||||
get: (key: string) => data[key],
|
||||
};
|
||||
}
|
||||
|
||||
// ============================================================================
|
||||
// Entity Deduplication Tests
|
||||
// ============================================================================
|
||||
|
||||
describe("Entity Deduplication", () => {
|
||||
let client: Neo4jMemoryClient;
|
||||
let mockDriver: ReturnType<typeof createMockDriver>;
|
||||
let mockSession: ReturnType<typeof createMockSession>;
|
||||
let mockLogger: ReturnType<typeof createMockLogger>;
|
||||
|
||||
beforeEach(() => {
|
||||
mockLogger = createMockLogger();
|
||||
mockDriver = createMockDriver();
|
||||
mockSession = createMockSession();
|
||||
mockDriver.session.mockReturnValue(mockSession);
|
||||
|
||||
client = new Neo4jMemoryClient("bolt://localhost:7687", "neo4j", "password", 1024, mockLogger);
|
||||
(client as any).driver = mockDriver;
|
||||
(client as any).indexesReady = true;
|
||||
});
|
||||
|
||||
// --------------------------------------------------------------------------
|
||||
// findDuplicateEntityPairs()
|
||||
// --------------------------------------------------------------------------
|
||||
|
||||
describe("findDuplicateEntityPairs", () => {
|
||||
it("finds substring matches: 'tarun' + 'tarun sukhani' (same type)", async () => {
|
||||
mockSession.run.mockResolvedValueOnce({
|
||||
records: [
|
||||
mockRecord({
|
||||
id1: "e1",
|
||||
name1: "tarun",
|
||||
mc1: 5,
|
||||
id2: "e2",
|
||||
name2: "tarun sukhani",
|
||||
mc2: 3,
|
||||
}),
|
||||
],
|
||||
});
|
||||
|
||||
const pairs = await client.findDuplicateEntityPairs();
|
||||
|
||||
expect(pairs).toHaveLength(1);
|
||||
// "tarun" has more mentions (5 > 3), so it should be kept
|
||||
expect(pairs[0].keepId).toBe("e1");
|
||||
expect(pairs[0].keepName).toBe("tarun");
|
||||
expect(pairs[0].removeId).toBe("e2");
|
||||
expect(pairs[0].removeName).toBe("tarun sukhani");
|
||||
});
|
||||
|
||||
it("keeps entity with more mentions regardless of name length", async () => {
|
||||
mockSession.run.mockResolvedValueOnce({
|
||||
records: [
|
||||
mockRecord({
|
||||
id1: "e1",
|
||||
name1: "fish speech",
|
||||
mc1: 2,
|
||||
id2: "e2",
|
||||
name2: "fish speech s1 mini",
|
||||
mc2: 10,
|
||||
}),
|
||||
],
|
||||
});
|
||||
|
||||
const pairs = await client.findDuplicateEntityPairs();
|
||||
|
||||
expect(pairs).toHaveLength(1);
|
||||
// "fish speech s1 mini" has more mentions (10 > 2), so it should be kept
|
||||
expect(pairs[0].keepId).toBe("e2");
|
||||
expect(pairs[0].keepName).toBe("fish speech s1 mini");
|
||||
expect(pairs[0].removeId).toBe("e1");
|
||||
expect(pairs[0].removeName).toBe("fish speech");
|
||||
});
|
||||
|
||||
it("keeps shorter name when mentions are equal", async () => {
|
||||
mockSession.run.mockResolvedValueOnce({
|
||||
records: [
|
||||
mockRecord({
|
||||
id1: "e1",
|
||||
name1: "aaditya",
|
||||
mc1: 5,
|
||||
id2: "e2",
|
||||
name2: "aaditya sukhani",
|
||||
mc2: 5,
|
||||
}),
|
||||
],
|
||||
});
|
||||
|
||||
const pairs = await client.findDuplicateEntityPairs();
|
||||
|
||||
expect(pairs).toHaveLength(1);
|
||||
// Equal mentions, so keep the shorter name ("aaditya")
|
||||
expect(pairs[0].keepId).toBe("e1");
|
||||
expect(pairs[0].keepName).toBe("aaditya");
|
||||
expect(pairs[0].removeId).toBe("e2");
|
||||
expect(pairs[0].removeName).toBe("aaditya sukhani");
|
||||
});
|
||||
|
||||
it("returns empty array when no duplicates exist", async () => {
|
||||
mockSession.run.mockResolvedValueOnce({ records: [] });
|
||||
|
||||
const pairs = await client.findDuplicateEntityPairs();
|
||||
|
||||
expect(pairs).toHaveLength(0);
|
||||
});
|
||||
|
||||
it("handles multiple duplicate pairs", async () => {
|
||||
mockSession.run.mockResolvedValueOnce({
|
||||
records: [
|
||||
mockRecord({
|
||||
id1: "e1",
|
||||
name1: "tarun",
|
||||
mc1: 5,
|
||||
id2: "e2",
|
||||
name2: "tarun sukhani",
|
||||
mc2: 3,
|
||||
}),
|
||||
mockRecord({
|
||||
id1: "e3",
|
||||
name1: "fish speech",
|
||||
mc1: 2,
|
||||
id2: "e4",
|
||||
name2: "fish speech s1 mini",
|
||||
mc2: 8,
|
||||
}),
|
||||
],
|
||||
});
|
||||
|
||||
const pairs = await client.findDuplicateEntityPairs();
|
||||
|
||||
expect(pairs).toHaveLength(2);
|
||||
});
|
||||
|
||||
it("handles NULL mention counts (treats as 0)", async () => {
|
||||
mockSession.run.mockResolvedValueOnce({
|
||||
records: [
|
||||
mockRecord({
|
||||
id1: "e1",
|
||||
name1: "neo4j",
|
||||
mc1: null,
|
||||
id2: "e2",
|
||||
name2: "neo4j database",
|
||||
mc2: null,
|
||||
}),
|
||||
],
|
||||
});
|
||||
|
||||
const pairs = await client.findDuplicateEntityPairs();
|
||||
|
||||
expect(pairs).toHaveLength(1);
|
||||
// Both NULL (treated as 0), so keep the shorter name
|
||||
expect(pairs[0].keepId).toBe("e1");
|
||||
expect(pairs[0].keepName).toBe("neo4j");
|
||||
});
|
||||
|
||||
it("passes the Cypher query with substring matching and type constraint", async () => {
|
||||
mockSession.run.mockResolvedValueOnce({ records: [] });
|
||||
|
||||
await client.findDuplicateEntityPairs();
|
||||
|
||||
const query = mockSession.run.mock.calls[0][0] as string;
|
||||
// Verify the query checks same type
|
||||
expect(query).toContain("e1.type = e2.type");
|
||||
// Verify the query checks CONTAINS in both directions
|
||||
expect(query).toContain("e1.name CONTAINS e2.name");
|
||||
expect(query).toContain("e2.name CONTAINS e1.name");
|
||||
// Verify minimum name length filter
|
||||
expect(query).toContain("size(e1.name) > 2");
|
||||
});
|
||||
});
|
||||
|
||||
// --------------------------------------------------------------------------
|
||||
// mergeEntityPair()
|
||||
// --------------------------------------------------------------------------
|
||||
|
||||
describe("mergeEntityPair", () => {
|
||||
it("transfers MENTIONS and deletes source entity", async () => {
|
||||
// mergeEntityPair uses executeWrite, so we need to set up the mock transaction
|
||||
const mockTx = {
|
||||
run: vi
|
||||
.fn()
|
||||
.mockResolvedValueOnce({
|
||||
// Transfer MENTIONS
|
||||
records: [mockRecord({ transferred: 3 })],
|
||||
})
|
||||
.mockResolvedValueOnce({
|
||||
// Update mentionCount
|
||||
records: [],
|
||||
})
|
||||
.mockResolvedValueOnce({
|
||||
// Delete removed entity
|
||||
records: [],
|
||||
}),
|
||||
};
|
||||
|
||||
mockSession.executeWrite.mockImplementationOnce(async (work: any) => work(mockTx));
|
||||
|
||||
const result = await client.mergeEntityPair("keep-id", "remove-id");
|
||||
|
||||
expect(result).toBe(true);
|
||||
// Should have been called 3 times: transfer, update count, delete
|
||||
expect(mockTx.run).toHaveBeenCalledTimes(3);
|
||||
|
||||
// Verify transfer query
|
||||
const transferQuery = mockTx.run.mock.calls[0][0] as string;
|
||||
expect(transferQuery).toContain("MERGE (m)-[:MENTIONS]->(keep)");
|
||||
expect(transferQuery).toContain("DELETE r");
|
||||
|
||||
// Verify update mentionCount
|
||||
const updateQuery = mockTx.run.mock.calls[1][0] as string;
|
||||
expect(updateQuery).toContain("mentionCount");
|
||||
|
||||
// Verify delete query
|
||||
const deleteQuery = mockTx.run.mock.calls[2][0] as string;
|
||||
expect(deleteQuery).toContain("DETACH DELETE e");
|
||||
});
|
||||
|
||||
it("skips mentionCount update when no relationships to transfer", async () => {
|
||||
const mockTx = {
|
||||
run: vi
|
||||
.fn()
|
||||
.mockResolvedValueOnce({
|
||||
// Transfer MENTIONS — 0 transferred
|
||||
records: [mockRecord({ transferred: 0 })],
|
||||
})
|
||||
.mockResolvedValueOnce({
|
||||
// Delete removed entity (mentionCount update is skipped)
|
||||
records: [],
|
||||
}),
|
||||
};
|
||||
|
||||
mockSession.executeWrite.mockImplementationOnce(async (work: any) => work(mockTx));
|
||||
|
||||
const result = await client.mergeEntityPair("keep-id", "remove-id");
|
||||
|
||||
expect(result).toBe(true);
|
||||
// Only 2 calls: transfer (0 results) and delete (skip update)
|
||||
expect(mockTx.run).toHaveBeenCalledTimes(2);
|
||||
});
|
||||
|
||||
it("returns false on error", async () => {
|
||||
mockSession.executeWrite.mockRejectedValueOnce(new Error("Neo4j connection lost"));
|
||||
|
||||
const result = await client.mergeEntityPair("keep-id", "remove-id");
|
||||
|
||||
expect(result).toBe(false);
|
||||
});
|
||||
});
|
||||
|
||||
// --------------------------------------------------------------------------
|
||||
// reconcileEntityMentionCounts()
|
||||
// --------------------------------------------------------------------------
|
||||
|
||||
describe("reconcileEntityMentionCounts", () => {
|
||||
it("updates entities with NULL mentionCount", async () => {
|
||||
mockSession.run.mockResolvedValueOnce({
|
||||
records: [mockRecord({ updated: 42 })],
|
||||
});
|
||||
|
||||
const updated = await client.reconcileEntityMentionCounts();
|
||||
|
||||
expect(updated).toBe(42);
|
||||
const query = mockSession.run.mock.calls[0][0] as string;
|
||||
expect(query).toContain("mentionCount IS NULL");
|
||||
expect(query).toContain("SET e.mentionCount = actual");
|
||||
});
|
||||
|
||||
it("returns 0 when all entities have mentionCount set", async () => {
|
||||
mockSession.run.mockResolvedValueOnce({
|
||||
records: [mockRecord({ updated: 0 })],
|
||||
});
|
||||
|
||||
const updated = await client.reconcileEntityMentionCounts();
|
||||
|
||||
expect(updated).toBe(0);
|
||||
});
|
||||
});
|
||||
});
|
||||
@@ -2022,4 +2022,108 @@ export class Neo4jMemoryClient {
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* Delete non-core, non-pinned memories matching a regex pattern.
|
||||
* Used by the sleep cycle noise pattern cleanup.
|
||||
*
|
||||
* @returns Number of memories deleted
|
||||
*/
|
||||
async deleteMemoriesByPattern(pattern: string, agentId?: string, limit = 100): Promise<number> {
|
||||
await this.ensureInitialized();
|
||||
const session = this.driver!.session();
|
||||
try {
|
||||
const agentFilter = agentId ? "AND m.agentId = $agentId" : "";
|
||||
const result = await session.run(
|
||||
`MATCH (m:Memory)
|
||||
WHERE m.text =~ $pattern
|
||||
AND coalesce(m.userPinned, false) = false
|
||||
AND m.category <> 'core'
|
||||
${agentFilter}
|
||||
WITH m LIMIT ${limit}
|
||||
DETACH DELETE m
|
||||
RETURN count(*) AS removed`,
|
||||
{ pattern, agentId },
|
||||
);
|
||||
return (result.records[0]?.get("removed") as number) ?? 0;
|
||||
} finally {
|
||||
await session.close();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Fetch all non-core memories (id + text) for a given agent, or all agents.
|
||||
* Used by the sleep cycle credential scanner.
|
||||
*/
|
||||
async fetchNonCoreMemories(agentId?: string): Promise<Array<{ id: string; text: string }>> {
|
||||
await this.ensureInitialized();
|
||||
const session = this.driver!.session();
|
||||
try {
|
||||
const agentFilter = agentId ? "AND m.agentId = $agentId" : "";
|
||||
const result = await session.run(
|
||||
`MATCH (m:Memory)
|
||||
WHERE m.category <> 'core'
|
||||
${agentFilter}
|
||||
RETURN m.id AS id, m.text AS text`,
|
||||
agentId ? { agentId } : {},
|
||||
);
|
||||
return result.records.map((r) => ({
|
||||
id: r.get("id") as string,
|
||||
text: r.get("text") as string,
|
||||
}));
|
||||
} finally {
|
||||
await session.close();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Delete memories by IDs (DETACH DELETE).
|
||||
* Used by the sleep cycle credential scanner.
|
||||
*
|
||||
* @returns Number of memories deleted
|
||||
*/
|
||||
async deleteMemoriesByIds(ids: string[]): Promise<number> {
|
||||
if (ids.length === 0) {
|
||||
return 0;
|
||||
}
|
||||
await this.ensureInitialized();
|
||||
const session = this.driver!.session();
|
||||
try {
|
||||
const result = await session.run(
|
||||
`UNWIND $ids AS id
|
||||
MATCH (m:Memory {id: id})
|
||||
DETACH DELETE m
|
||||
RETURN count(*) AS removed`,
|
||||
{ ids },
|
||||
);
|
||||
return (result.records[0]?.get("removed") as number) ?? 0;
|
||||
} finally {
|
||||
await session.close();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Reconcile mentionCount for all entities by counting actual MENTIONS relationships.
|
||||
* Fixes entities with NULL or stale mentionCount values (e.g., entities created
|
||||
* before mentionCount tracking was added).
|
||||
*
|
||||
* @returns Number of entities updated
|
||||
*/
|
||||
async reconcileEntityMentionCounts(): Promise<number> {
|
||||
await this.ensureInitialized();
|
||||
const session = this.driver!.session();
|
||||
try {
|
||||
const result = await session.run(
|
||||
`MATCH (e:Entity)
|
||||
WHERE e.mentionCount IS NULL
|
||||
OPTIONAL MATCH (m:Memory)-[:MENTIONS]->(e)
|
||||
WITH e, count(m) AS actual
|
||||
SET e.mentionCount = actual
|
||||
RETURN count(e) AS updated`,
|
||||
);
|
||||
return (result.records[0]?.get("updated") as number) ?? 0;
|
||||
} finally {
|
||||
await session.close();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
165
extensions/memory-neo4j/sleep-cycle.credential-scan.test.ts
Normal file
165
extensions/memory-neo4j/sleep-cycle.credential-scan.test.ts
Normal file
@@ -0,0 +1,165 @@
|
||||
/**
|
||||
* Tests for credential scanning in the sleep cycle.
|
||||
*
|
||||
* Verifies that CREDENTIAL_PATTERNS and detectCredential() correctly
|
||||
* identify credential-like content in memory text while not flagging
|
||||
* clean text.
|
||||
*/
|
||||
|
||||
import { describe, it, expect } from "vitest";
|
||||
import { CREDENTIAL_PATTERNS, detectCredential } from "./sleep-cycle.js";
|
||||
|
||||
describe("Credential Detection", () => {
|
||||
// --------------------------------------------------------------------------
|
||||
// detectCredential() — should flag dangerous content
|
||||
// --------------------------------------------------------------------------
|
||||
|
||||
describe("should detect credentials", () => {
|
||||
it("detects API keys (sk-...)", () => {
|
||||
const result = detectCredential("Use the key sk-abc123def456ghi789jkl012mno345");
|
||||
expect(result).toBe("API key");
|
||||
});
|
||||
|
||||
it("detects api_key patterns", () => {
|
||||
const result = detectCredential("Set api_key_live_abcdef1234567890abcdef");
|
||||
expect(result).toBe("API key");
|
||||
});
|
||||
|
||||
it("detects Bearer tokens", () => {
|
||||
const result = detectCredential(
|
||||
"Authorization: Bearer eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.payload.signature",
|
||||
);
|
||||
// Could match either Bearer token or JWT — both are valid detections
|
||||
expect(result).not.toBeNull();
|
||||
});
|
||||
|
||||
it("detects password assignments (password: X)", () => {
|
||||
const result = detectCredential("The database password: myS3cretP@ss!");
|
||||
expect(result).toBe("Password assignment");
|
||||
});
|
||||
|
||||
it("detects password assignments (password=X)", () => {
|
||||
const result = detectCredential("config has password=hunter2 in it");
|
||||
expect(result).toBe("Password assignment");
|
||||
});
|
||||
|
||||
it("detects the missed pattern: login with X creds user/pass", () => {
|
||||
const result = detectCredential("login with radarr creds hullah/fuckbar");
|
||||
expect(result).toBe("Credentials (user/pass)");
|
||||
});
|
||||
|
||||
it("detects creds user/pass without login prefix", () => {
|
||||
const result = detectCredential("use creds admin/password123 for the server");
|
||||
expect(result).toBe("Credentials (user/pass)");
|
||||
});
|
||||
|
||||
it("detects URL-embedded credentials", () => {
|
||||
const result = detectCredential("Connect to https://admin:secretpass@db.example.com/mydb");
|
||||
expect(result).toBe("URL credentials");
|
||||
});
|
||||
|
||||
it("detects URL credentials with http://", () => {
|
||||
const result = detectCredential("http://user:pass@192.168.1.1:8080/api");
|
||||
expect(result).toBe("URL credentials");
|
||||
});
|
||||
|
||||
it("detects private keys", () => {
|
||||
const result = detectCredential("-----BEGIN RSA PRIVATE KEY-----\nMIIEow...");
|
||||
expect(result).toBe("Private key");
|
||||
});
|
||||
|
||||
it("detects AWS access keys", () => {
|
||||
const result = detectCredential("AWS_ACCESS_KEY_ID=AKIAIOSFODNN7EXAMPLE");
|
||||
expect(result).toBe("AWS key");
|
||||
});
|
||||
|
||||
it("detects GitHub personal access tokens", () => {
|
||||
const result = detectCredential("Set GITHUB_TOKEN=ghp_ABCDEFabcdef1234567890");
|
||||
expect(result).toBe("GitHub/GitLab token");
|
||||
});
|
||||
|
||||
it("detects GitLab tokens", () => {
|
||||
const result = detectCredential("Use glpat-xxxxxxxxxxxxxxxxxxxx for auth");
|
||||
expect(result).toBe("GitHub/GitLab token");
|
||||
});
|
||||
|
||||
it("detects JWT tokens", () => {
|
||||
const result = detectCredential(
|
||||
"Token: eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiIxMjM0NTY3ODkwIiwibmFtZSI6IkpvaG4gRG9lIiwiaWF0IjoxNTE2MjM5MDIyfQ.SflKxwRJSMeKKF2QT4fwpMeJf36POk6yJV_adQssw5c",
|
||||
);
|
||||
expect(result).toBe("JWT");
|
||||
});
|
||||
|
||||
it("detects token=value patterns", () => {
|
||||
const result = detectCredential(
|
||||
"Set token=abcdef1234567890abcdef1234567890ab for authentication",
|
||||
);
|
||||
expect(result).toBe("Token/secret");
|
||||
});
|
||||
|
||||
it("detects secret: value patterns", () => {
|
||||
const result = detectCredential(
|
||||
"The client secret: abcdef1234567890abcdef1234567890abcdef12",
|
||||
);
|
||||
expect(result).toBe("Token/secret");
|
||||
});
|
||||
});
|
||||
|
||||
// --------------------------------------------------------------------------
|
||||
// detectCredential() — should NOT flag clean text
|
||||
// --------------------------------------------------------------------------
|
||||
|
||||
describe("should not flag clean text", () => {
|
||||
it("does not flag normal text", () => {
|
||||
expect(detectCredential("Remember to buy groceries tomorrow")).toBeNull();
|
||||
});
|
||||
|
||||
it("does not flag password advice (without actual password)", () => {
|
||||
expect(
|
||||
detectCredential("Make sure the password is at least 8 characters long for security"),
|
||||
).toBeNull();
|
||||
});
|
||||
|
||||
it("does not flag discussion about tokens", () => {
|
||||
expect(detectCredential("We should use JWT tokens for authentication")).toBeNull();
|
||||
});
|
||||
|
||||
it("does not flag short key-like words", () => {
|
||||
expect(detectCredential("The key to success is persistence")).toBeNull();
|
||||
});
|
||||
|
||||
it("does not flag URLs without credentials", () => {
|
||||
expect(detectCredential("Visit https://example.com/api/v1 for docs")).toBeNull();
|
||||
});
|
||||
|
||||
it("does not flag discussion about API key rotation", () => {
|
||||
expect(detectCredential("Rotate your API keys every 90 days as a best practice")).toBeNull();
|
||||
});
|
||||
|
||||
it("does not flag file paths", () => {
|
||||
expect(detectCredential("Credentials are stored in /home/user/.secrets/api.json")).toBeNull();
|
||||
});
|
||||
|
||||
it("does not flag casual use of slash in text", () => {
|
||||
expect(detectCredential("Use the read/write mode for better performance")).toBeNull();
|
||||
});
|
||||
});
|
||||
|
||||
// --------------------------------------------------------------------------
|
||||
// CREDENTIAL_PATTERNS — structural checks
|
||||
// --------------------------------------------------------------------------
|
||||
|
||||
describe("CREDENTIAL_PATTERNS structure", () => {
|
||||
it("has at least 8 patterns", () => {
|
||||
expect(CREDENTIAL_PATTERNS.length).toBeGreaterThanOrEqual(8);
|
||||
});
|
||||
|
||||
it("each pattern has a label and valid RegExp", () => {
|
||||
for (const { pattern, label } of CREDENTIAL_PATTERNS) {
|
||||
expect(pattern).toBeInstanceOf(RegExp);
|
||||
expect(label).toBeTruthy();
|
||||
expect(typeof label).toBe("string");
|
||||
}
|
||||
});
|
||||
});
|
||||
});
|
||||
@@ -1,18 +1,22 @@
|
||||
/**
|
||||
* Eight-phase sleep cycle for memory consolidation.
|
||||
* Multi-phase sleep cycle for memory consolidation.
|
||||
*
|
||||
* Implements a Pareto-based memory ecosystem where core memory
|
||||
* is bounded to the top 20% of memories by effective score.
|
||||
*
|
||||
* Phases:
|
||||
* 1. DEDUPLICATION - Merge near-duplicate memories (reduce redundancy)
|
||||
* 1. DEDUPLICATION - Merge near-duplicate memories (reduce redundancy)
|
||||
* 1b. SEMANTIC DEDUP - LLM-based paraphrase detection
|
||||
* 1c. CONFLICT DETECTION - Resolve contradictory memories
|
||||
* 1d. ENTITY DEDUP - Merge near-duplicate entities (reduce entity bloat)
|
||||
* 2. PARETO SCORING - Calculate effective scores for all memories
|
||||
* 3. CORE PROMOTION - Regular memories above threshold -> core
|
||||
* 4. EXTRACTION - Form entity relationships (strengthen connections)
|
||||
* 5. DECAY/PRUNING - Remove old, low-importance memories (forgetting curve)
|
||||
* 6. CLEANUP - Remove orphaned entities/tags (garbage collection)
|
||||
* 7. NOISE CLEANUP - Remove dangerous pattern memories
|
||||
* 2. PARETO SCORING - Calculate effective scores for all memories
|
||||
* 3. CORE PROMOTION - Regular memories above threshold -> core
|
||||
* 4. EXTRACTION - Form entity relationships (strengthen connections)
|
||||
* 5. DECAY/PRUNING - Remove old, low-importance memories (forgetting curve)
|
||||
* 6. CLEANUP - Remove orphaned entities/tags (garbage collection)
|
||||
* 7. NOISE CLEANUP - Remove dangerous pattern memories
|
||||
* 7b. CREDENTIAL SCAN - Remove memories containing leaked credentials
|
||||
* 8. TASK LEDGER - Archive stale tasks in TASKS.md
|
||||
*
|
||||
* Research basis:
|
||||
* - Pareto principle (20/80 rule) for memory tiering
|
||||
@@ -27,6 +31,7 @@ import type { Neo4jMemoryClient } from "./neo4j-client.js";
|
||||
import type { Logger } from "./schema.js";
|
||||
import { isSemanticDuplicate, resolveConflict, runBackgroundExtraction } from "./extractor.js";
|
||||
import { makePairKey } from "./schema.js";
|
||||
import { reviewAndArchiveStaleTasks, type StaleTaskResult } from "./task-ledger.js";
|
||||
|
||||
/**
|
||||
* Sleep Cycle Result - aggregated stats from all phases.
|
||||
@@ -82,6 +87,18 @@ export type SleepCycleResult = {
|
||||
tagsRemoved: number;
|
||||
singleUseTagsRemoved: number;
|
||||
};
|
||||
// Phase 7b: Credential Scanning
|
||||
credentialScan: {
|
||||
memoriesScanned: number;
|
||||
credentialsFound: number;
|
||||
memoriesRemoved: number;
|
||||
};
|
||||
// Phase 8: Task Ledger Cleanup
|
||||
taskLedger: {
|
||||
staleCount: number;
|
||||
archivedCount: number;
|
||||
archivedIds: string[];
|
||||
};
|
||||
// Overall
|
||||
durationMs: number;
|
||||
aborted: boolean;
|
||||
@@ -120,6 +137,10 @@ export type SleepCycleOptions = {
|
||||
decayImportanceMultiplier?: number; // How much importance extends half-life (default: 2)
|
||||
decayCurves?: Record<string, { halfLifeDays: number }>; // Per-category decay curve overrides
|
||||
|
||||
// Phase 8: Task Ledger
|
||||
workspaceDir?: string; // Workspace dir for TASKS.md (default: resolved from env)
|
||||
staleTaskMaxAgeMs?: number; // Max age before task is stale (default: 24h)
|
||||
|
||||
// Progress callback
|
||||
onPhaseStart?: (
|
||||
phase:
|
||||
@@ -131,11 +152,76 @@ export type SleepCycleOptions = {
|
||||
| "promotion"
|
||||
| "decay"
|
||||
| "extraction"
|
||||
| "cleanup",
|
||||
| "cleanup"
|
||||
| "noiseCleanup"
|
||||
| "credentialScan"
|
||||
| "taskLedger",
|
||||
) => void;
|
||||
onProgress?: (phase: string, message: string) => void;
|
||||
};
|
||||
|
||||
// ============================================================================
|
||||
// Credential Detection Patterns
|
||||
// ============================================================================
|
||||
|
||||
/**
|
||||
* Regex patterns that match credential-like content in memory text.
|
||||
* Used by the credential scanning phase to find and remove memories
|
||||
* that accidentally stored secrets, passwords, API keys, or tokens.
|
||||
*
|
||||
* These are JavaScript RegExp patterns (case-insensitive).
|
||||
*/
|
||||
export const CREDENTIAL_PATTERNS: Array<{ pattern: RegExp; label: string }> = [
|
||||
// API keys: sk-..., api_key_..., api_key_live_..., apikey-..., etc.
|
||||
{ pattern: /\b(?:sk|api[_-]?key(?:[_-]\w+)?)[_-][a-z0-9]{16,}/i, label: "API key" },
|
||||
|
||||
// Bearer tokens
|
||||
{ pattern: /bearer\s+[a-z0-9_\-.]{20,}/i, label: "Bearer token" },
|
||||
|
||||
// JWT tokens (three base64 segments separated by dots) — check before generic token pattern
|
||||
{ pattern: /\beyJ[a-zA-Z0-9_-]{20,}\.[a-zA-Z0-9_-]{20,}\.[a-zA-Z0-9_-]{20,}/i, label: "JWT" },
|
||||
|
||||
// Generic long tokens/secrets (hex or base64, 32+ chars)
|
||||
{
|
||||
pattern: /\b(?:token|secret|key)\s*[:=]\s*["']?[a-z0-9+/=_\-]{32,}["']?/i,
|
||||
label: "Token/secret",
|
||||
},
|
||||
|
||||
// Password patterns: password: X, password=X, password X, passwd=X, pwd=X
|
||||
{
|
||||
pattern: /\b(?:password|passwd|pwd)\s*[:=]\s*["']?\S{4,}["']?/i,
|
||||
label: "Password assignment",
|
||||
},
|
||||
|
||||
// Credentials in "creds user/pass" format: "login with X creds user/pass"
|
||||
{ pattern: /\bcreds?\s+\S+[/\\]\S+/i, label: "Credentials (user/pass)" },
|
||||
|
||||
// URL-embedded credentials: https://user:pass@host
|
||||
{ pattern: /\/\/[^/\s:]+:[^/\s@]+@/i, label: "URL credentials" },
|
||||
|
||||
// Private keys
|
||||
{ pattern: /-----BEGIN\s+(?:RSA\s+)?PRIVATE\s+KEY-----/i, label: "Private key" },
|
||||
|
||||
// AWS-style keys
|
||||
{ pattern: /\b(?:AKIA|ASIA)[A-Z0-9]{16}\b/, label: "AWS key" },
|
||||
|
||||
// GitHub/GitLab tokens
|
||||
{ pattern: /\b(?:ghp|gho|ghu|ghs|ghr|glpat)[_-][a-zA-Z0-9]{16,}/i, label: "GitHub/GitLab token" },
|
||||
];
|
||||
|
||||
/**
|
||||
* Check if a text contains credential-like content.
|
||||
* Returns the first matching pattern label, or null if clean.
|
||||
*/
|
||||
export function detectCredential(text: string): string | null {
|
||||
for (const { pattern, label } of CREDENTIAL_PATTERNS) {
|
||||
if (pattern.test(text)) {
|
||||
return label;
|
||||
}
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
// ============================================================================
|
||||
// Sleep Cycle Implementation
|
||||
// ============================================================================
|
||||
@@ -180,6 +266,8 @@ export async function runSleepCycle(
|
||||
extractionBatchSize = 50,
|
||||
extractionDelayMs = 1000,
|
||||
singleUseTagMinAgeDays = 14,
|
||||
workspaceDir,
|
||||
staleTaskMaxAgeMs,
|
||||
onPhaseStart,
|
||||
onProgress,
|
||||
} = options;
|
||||
@@ -199,6 +287,8 @@ export async function runSleepCycle(
|
||||
decay: { memoriesPruned: 0 },
|
||||
extraction: { total: 0, processed: 0, succeeded: 0, failed: 0 },
|
||||
cleanup: { entitiesRemoved: 0, tagsRemoved: 0, singleUseTagsRemoved: 0 },
|
||||
credentialScan: { memoriesScanned: 0, credentialsFound: 0, memoriesRemoved: 0 },
|
||||
taskLedger: { staleCount: 0, archivedCount: 0, archivedIds: [] },
|
||||
durationMs: 0,
|
||||
aborted: false,
|
||||
};
|
||||
@@ -443,6 +533,15 @@ export async function runSleepCycle(
|
||||
logger.info("memory-neo4j: [sleep] Phase 1d: Entity Deduplication");
|
||||
|
||||
try {
|
||||
// Reconcile NULL mentionCounts before dedup so decisions are based on accurate counts
|
||||
const reconciled = await db.reconcileEntityMentionCounts();
|
||||
if (reconciled > 0) {
|
||||
logger.info(
|
||||
`memory-neo4j: [sleep] Phase 1d: Reconciled mentionCount for ${reconciled} entities`,
|
||||
);
|
||||
onProgress?.("entityDedup", `Reconciled ${reconciled} entity mention counts`);
|
||||
}
|
||||
|
||||
const pairs = await db.findDuplicateEntityPairs(agentId);
|
||||
result.entityDedup.pairsFound = pairs.length;
|
||||
|
||||
@@ -749,6 +848,7 @@ export async function runSleepCycle(
|
||||
// stored (open proposals, action items that trigger rogue sessions).
|
||||
// --------------------------------------------------------------------------
|
||||
if (!abortSignal?.aborted) {
|
||||
onPhaseStart?.("noiseCleanup");
|
||||
logger.info("memory-neo4j: [sleep] Phase 7: Noise Pattern Cleanup");
|
||||
|
||||
try {
|
||||
@@ -763,29 +863,11 @@ export async function runSleepCycle(
|
||||
];
|
||||
|
||||
let noiseRemoved = 0;
|
||||
const noiseSession = (db as any).driver!.session();
|
||||
try {
|
||||
for (const pattern of noisePatterns) {
|
||||
if (abortSignal?.aborted) {
|
||||
break;
|
||||
}
|
||||
|
||||
const agentFilter = agentId ? "AND m.agentId = $agentId" : "";
|
||||
const result = await noiseSession.run(
|
||||
`MATCH (m:Memory)
|
||||
WHERE m.text =~ $pattern
|
||||
AND coalesce(m.userPinned, false) = false
|
||||
AND m.category <> 'core'
|
||||
${agentFilter}
|
||||
WITH m LIMIT 100
|
||||
DETACH DELETE m
|
||||
RETURN count(*) AS removed`,
|
||||
{ pattern: `.*${pattern}.*`, agentId },
|
||||
);
|
||||
noiseRemoved += (result.records[0]?.get("removed") as number) ?? 0;
|
||||
for (const pattern of noisePatterns) {
|
||||
if (abortSignal?.aborted) {
|
||||
break;
|
||||
}
|
||||
} finally {
|
||||
await noiseSession.close();
|
||||
noiseRemoved += await db.deleteMemoriesByPattern(`.*${pattern}.*`, agentId);
|
||||
}
|
||||
|
||||
if (noiseRemoved > 0) {
|
||||
@@ -800,6 +882,90 @@ export async function runSleepCycle(
|
||||
}
|
||||
}
|
||||
|
||||
// --------------------------------------------------------------------------
|
||||
// Phase 7b: Credential Scanning
|
||||
// Scans all memories for accidentally stored credentials (API keys,
|
||||
// passwords, tokens) and removes them. This is a security measure
|
||||
// to prevent credential leaks in the memory store.
|
||||
// --------------------------------------------------------------------------
|
||||
if (!abortSignal?.aborted) {
|
||||
onPhaseStart?.("credentialScan");
|
||||
logger.info("memory-neo4j: [sleep] Phase 7b: Credential Scanning");
|
||||
|
||||
try {
|
||||
const allMemories = await db.fetchNonCoreMemories(agentId);
|
||||
result.credentialScan.memoriesScanned = allMemories.length;
|
||||
|
||||
const toRemove: string[] = [];
|
||||
for (const { id, text } of allMemories) {
|
||||
if (abortSignal?.aborted) {
|
||||
break;
|
||||
}
|
||||
const matched = detectCredential(text);
|
||||
if (matched) {
|
||||
toRemove.push(id);
|
||||
result.credentialScan.credentialsFound++;
|
||||
onProgress?.(
|
||||
"credentialScan",
|
||||
`Found ${matched} in memory ${id.slice(0, 8)}...: "${text.slice(0, 40)}..."`,
|
||||
);
|
||||
logger.warn(
|
||||
`memory-neo4j: [sleep] Credential detected (${matched}) in memory ${id} — removing`,
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
if (toRemove.length > 0) {
|
||||
result.credentialScan.memoriesRemoved = await db.deleteMemoriesByIds(toRemove);
|
||||
}
|
||||
|
||||
logger.info(
|
||||
`memory-neo4j: [sleep] Phase 7b complete — ${result.credentialScan.memoriesScanned} scanned, ${result.credentialScan.credentialsFound} credentials found, ${result.credentialScan.memoriesRemoved} removed`,
|
||||
);
|
||||
} catch (err) {
|
||||
logger.warn(`memory-neo4j: [sleep] Phase 7b error: ${String(err)}`);
|
||||
}
|
||||
}
|
||||
|
||||
// --------------------------------------------------------------------------
|
||||
// Phase 8: Task Ledger Cleanup
|
||||
// Reviews TASKS.md for stale tasks (>24h with no activity) and archives them.
|
||||
// Requires workspaceDir to be provided (otherwise skipped).
|
||||
// --------------------------------------------------------------------------
|
||||
if (!abortSignal?.aborted && workspaceDir) {
|
||||
onPhaseStart?.("taskLedger");
|
||||
logger.info("memory-neo4j: [sleep] Phase 8: Task Ledger Cleanup");
|
||||
|
||||
try {
|
||||
const staleResult = await reviewAndArchiveStaleTasks(workspaceDir, staleTaskMaxAgeMs);
|
||||
|
||||
if (staleResult) {
|
||||
result.taskLedger.staleCount = staleResult.staleCount;
|
||||
result.taskLedger.archivedCount = staleResult.archivedCount;
|
||||
result.taskLedger.archivedIds = staleResult.archivedIds;
|
||||
|
||||
if (staleResult.archivedCount > 0) {
|
||||
onProgress?.(
|
||||
"taskLedger",
|
||||
`Archived ${staleResult.archivedCount} stale tasks: ${staleResult.archivedIds.join(", ")}`,
|
||||
);
|
||||
} else {
|
||||
onProgress?.("taskLedger", "No stale tasks found");
|
||||
}
|
||||
} else {
|
||||
onProgress?.("taskLedger", "TASKS.md not found — skipped");
|
||||
}
|
||||
|
||||
logger.info(
|
||||
`memory-neo4j: [sleep] Phase 8 complete — ${result.taskLedger.archivedCount} stale tasks archived`,
|
||||
);
|
||||
} catch (err) {
|
||||
logger.warn(`memory-neo4j: [sleep] Phase 8 error: ${String(err)}`);
|
||||
}
|
||||
} else if (!workspaceDir) {
|
||||
logger.info("memory-neo4j: [sleep] Phase 8: Task Ledger Cleanup — SKIPPED (no workspace dir)");
|
||||
}
|
||||
|
||||
result.durationMs = Date.now() - startTime;
|
||||
result.aborted = abortSignal?.aborted ?? false;
|
||||
|
||||
|
||||
466
extensions/memory-neo4j/task-ledger.test.ts
Normal file
466
extensions/memory-neo4j/task-ledger.test.ts
Normal file
@@ -0,0 +1,466 @@
|
||||
import fs from "node:fs/promises";
|
||||
import os from "node:os";
|
||||
import path from "node:path";
|
||||
import { afterEach, beforeEach, describe, expect, it } from "vitest";
|
||||
import {
|
||||
findStaleTasks,
|
||||
parseTaskDate,
|
||||
parseTaskLedger,
|
||||
reviewAndArchiveStaleTasks,
|
||||
serializeTask,
|
||||
serializeTaskLedger,
|
||||
} from "./task-ledger.js";
|
||||
|
||||
// ============================================================================
|
||||
// parseTaskDate
|
||||
// ============================================================================
|
||||
|
||||
describe("parseTaskDate", () => {
|
||||
it("parses YYYY-MM-DD HH:MM format", () => {
|
||||
const date = parseTaskDate("2026-02-14 09:15");
|
||||
expect(date).not.toBeNull();
|
||||
expect(date!.getFullYear()).toBe(2026);
|
||||
expect(date!.getMonth()).toBe(1); // February is month 1
|
||||
expect(date!.getDate()).toBe(14);
|
||||
});
|
||||
|
||||
it("parses YYYY-MM-DD HH:MM with timezone abbreviation", () => {
|
||||
const date = parseTaskDate("2026-02-14 09:15 MYT");
|
||||
expect(date).not.toBeNull();
|
||||
expect(date!.getFullYear()).toBe(2026);
|
||||
});
|
||||
|
||||
it("parses ISO format", () => {
|
||||
const date = parseTaskDate("2026-02-14T09:15:00");
|
||||
expect(date).not.toBeNull();
|
||||
expect(date!.getFullYear()).toBe(2026);
|
||||
});
|
||||
|
||||
it("returns null for empty string", () => {
|
||||
expect(parseTaskDate("")).toBeNull();
|
||||
});
|
||||
|
||||
it("returns null for invalid date", () => {
|
||||
expect(parseTaskDate("not-a-date")).toBeNull();
|
||||
});
|
||||
});
|
||||
|
||||
// ============================================================================
|
||||
// parseTaskLedger
|
||||
// ============================================================================
|
||||
|
||||
describe("parseTaskLedger", () => {
|
||||
it("parses a simple task ledger", () => {
|
||||
const content = [
|
||||
"# Active Tasks",
|
||||
"",
|
||||
"## TASK-001: Restaurant Booking",
|
||||
"- **Status:** in_progress",
|
||||
"- **Started:** 2026-02-14 09:15",
|
||||
"- **Updated:** 2026-02-14 09:30",
|
||||
"- **Details:** Graze, 4 pax, 19:30",
|
||||
"- **Current Step:** Form filled, awaiting confirmation",
|
||||
"",
|
||||
"# Completed",
|
||||
"<!-- Move done tasks here with completion date -->",
|
||||
].join("\n");
|
||||
|
||||
const ledger = parseTaskLedger(content);
|
||||
expect(ledger.activeTasks).toHaveLength(1);
|
||||
expect(ledger.completedTasks).toHaveLength(0);
|
||||
|
||||
const task = ledger.activeTasks[0];
|
||||
expect(task.id).toBe("TASK-001");
|
||||
expect(task.title).toBe("Restaurant Booking");
|
||||
expect(task.status).toBe("in_progress");
|
||||
expect(task.started).toBe("2026-02-14 09:15");
|
||||
expect(task.updated).toBe("2026-02-14 09:30");
|
||||
expect(task.details).toBe("Graze, 4 pax, 19:30");
|
||||
expect(task.currentStep).toBe("Form filled, awaiting confirmation");
|
||||
expect(task.isCompleted).toBe(false);
|
||||
});
|
||||
|
||||
it("parses multiple active tasks", () => {
|
||||
const content = [
|
||||
"# Active Tasks",
|
||||
"",
|
||||
"## TASK-001: Task One",
|
||||
"- **Status:** in_progress",
|
||||
"- **Started:** 2026-02-14 09:00",
|
||||
"",
|
||||
"## TASK-002: Task Two",
|
||||
"- **Status:** awaiting_input",
|
||||
"- **Started:** 2026-02-14 10:00",
|
||||
"",
|
||||
"# Completed",
|
||||
].join("\n");
|
||||
|
||||
const ledger = parseTaskLedger(content);
|
||||
expect(ledger.activeTasks).toHaveLength(2);
|
||||
expect(ledger.activeTasks[0].id).toBe("TASK-001");
|
||||
expect(ledger.activeTasks[1].id).toBe("TASK-002");
|
||||
});
|
||||
|
||||
it("parses completed tasks", () => {
|
||||
const content = [
|
||||
"# Active Tasks",
|
||||
"",
|
||||
"# Completed",
|
||||
"",
|
||||
"## ~~TASK-001: Old Task~~",
|
||||
"- **Status:** done",
|
||||
"- **Started:** 2026-02-13 09:00",
|
||||
"- **Updated:** 2026-02-13 15:00",
|
||||
].join("\n");
|
||||
|
||||
const ledger = parseTaskLedger(content);
|
||||
expect(ledger.activeTasks).toHaveLength(0);
|
||||
expect(ledger.completedTasks).toHaveLength(1);
|
||||
expect(ledger.completedTasks[0].id).toBe("TASK-001");
|
||||
expect(ledger.completedTasks[0].isCompleted).toBe(true);
|
||||
});
|
||||
|
||||
it("parses blocked tasks", () => {
|
||||
const content = [
|
||||
"# Active Tasks",
|
||||
"",
|
||||
"## TASK-001: Blocked Task",
|
||||
"- **Status:** blocked",
|
||||
"- **Started:** 2026-02-14 09:00",
|
||||
"- **Blocked On:** Waiting for API key",
|
||||
"",
|
||||
"# Completed",
|
||||
].join("\n");
|
||||
|
||||
const ledger = parseTaskLedger(content);
|
||||
expect(ledger.activeTasks).toHaveLength(1);
|
||||
expect(ledger.activeTasks[0].blockedOn).toBe("Waiting for API key");
|
||||
});
|
||||
|
||||
it("handles empty task ledger", () => {
|
||||
const content = [
|
||||
"# Active Tasks",
|
||||
"",
|
||||
"# Completed",
|
||||
"<!-- Move done tasks here with completion date -->",
|
||||
].join("\n");
|
||||
|
||||
const ledger = parseTaskLedger(content);
|
||||
expect(ledger.activeTasks).toHaveLength(0);
|
||||
expect(ledger.completedTasks).toHaveLength(0);
|
||||
});
|
||||
|
||||
it("handles Last Updated field variant", () => {
|
||||
const content = [
|
||||
"# Active Tasks",
|
||||
"",
|
||||
"## TASK-001: Some Task",
|
||||
"- **Status:** in_progress",
|
||||
"- **Last Updated:** 2026-02-14 10:00",
|
||||
"",
|
||||
"# Completed",
|
||||
].join("\n");
|
||||
|
||||
const ledger = parseTaskLedger(content);
|
||||
expect(ledger.activeTasks[0].updated).toBe("2026-02-14 10:00");
|
||||
});
|
||||
});
|
||||
|
||||
// ============================================================================
|
||||
// findStaleTasks
|
||||
// ============================================================================
|
||||
|
||||
describe("findStaleTasks", () => {
|
||||
const now = new Date("2026-02-15T10:00:00");
|
||||
const twentyFourHoursMs = 24 * 60 * 60 * 1000;
|
||||
|
||||
it("identifies tasks older than 24h as stale", () => {
|
||||
const tasks = [
|
||||
{
|
||||
id: "TASK-001",
|
||||
title: "Old Task",
|
||||
status: "in_progress" as const,
|
||||
updated: "2026-02-14 08:00",
|
||||
rawLines: [],
|
||||
isCompleted: false,
|
||||
},
|
||||
];
|
||||
|
||||
const stale = findStaleTasks(tasks, now, twentyFourHoursMs);
|
||||
expect(stale).toHaveLength(1);
|
||||
expect(stale[0].id).toBe("TASK-001");
|
||||
});
|
||||
|
||||
it("does not mark recent tasks as stale", () => {
|
||||
const tasks = [
|
||||
{
|
||||
id: "TASK-001",
|
||||
title: "Recent Task",
|
||||
status: "in_progress" as const,
|
||||
updated: "2026-02-15 09:00",
|
||||
rawLines: [],
|
||||
isCompleted: false,
|
||||
},
|
||||
];
|
||||
|
||||
const stale = findStaleTasks(tasks, now, twentyFourHoursMs);
|
||||
expect(stale).toHaveLength(0);
|
||||
});
|
||||
|
||||
it("skips done tasks", () => {
|
||||
const tasks = [
|
||||
{
|
||||
id: "TASK-001",
|
||||
title: "Done Task",
|
||||
status: "done" as const,
|
||||
updated: "2026-02-13 08:00",
|
||||
rawLines: [],
|
||||
isCompleted: false,
|
||||
},
|
||||
];
|
||||
|
||||
const stale = findStaleTasks(tasks, now, twentyFourHoursMs);
|
||||
expect(stale).toHaveLength(0);
|
||||
});
|
||||
|
||||
it("skips already-stale tasks", () => {
|
||||
const tasks = [
|
||||
{
|
||||
id: "TASK-001",
|
||||
title: "Already Stale",
|
||||
status: "stale" as const,
|
||||
updated: "2026-02-13 08:00",
|
||||
rawLines: [],
|
||||
isCompleted: false,
|
||||
},
|
||||
];
|
||||
|
||||
const stale = findStaleTasks(tasks, now, twentyFourHoursMs);
|
||||
expect(stale).toHaveLength(0);
|
||||
});
|
||||
|
||||
it("uses started date when updated is missing", () => {
|
||||
const tasks = [
|
||||
{
|
||||
id: "TASK-001",
|
||||
title: "No Update Date",
|
||||
status: "in_progress" as const,
|
||||
started: "2026-02-14 08:00",
|
||||
rawLines: [],
|
||||
isCompleted: false,
|
||||
},
|
||||
];
|
||||
|
||||
const stale = findStaleTasks(tasks, now, twentyFourHoursMs);
|
||||
expect(stale).toHaveLength(1);
|
||||
});
|
||||
|
||||
it("marks tasks with no dates as stale", () => {
|
||||
const tasks = [
|
||||
{
|
||||
id: "TASK-001",
|
||||
title: "No Dates",
|
||||
status: "in_progress" as const,
|
||||
rawLines: [],
|
||||
isCompleted: false,
|
||||
},
|
||||
];
|
||||
|
||||
const stale = findStaleTasks(tasks, now, twentyFourHoursMs);
|
||||
expect(stale).toHaveLength(1);
|
||||
});
|
||||
});
|
||||
|
||||
// ============================================================================
|
||||
// serializeTask / serializeTaskLedger
|
||||
// ============================================================================
|
||||
|
||||
describe("serializeTask", () => {
|
||||
it("serializes an active task", () => {
|
||||
const task = {
|
||||
id: "TASK-001",
|
||||
title: "My Task",
|
||||
status: "in_progress" as const,
|
||||
started: "2026-02-14 09:00",
|
||||
updated: "2026-02-14 10:00",
|
||||
details: "Some details",
|
||||
currentStep: "Step 1",
|
||||
rawLines: [],
|
||||
isCompleted: false,
|
||||
};
|
||||
|
||||
const lines = serializeTask(task);
|
||||
expect(lines[0]).toBe("## TASK-001: My Task");
|
||||
expect(lines).toContain("- **Status:** in_progress");
|
||||
expect(lines).toContain("- **Started:** 2026-02-14 09:00");
|
||||
expect(lines).toContain("- **Updated:** 2026-02-14 10:00");
|
||||
expect(lines).toContain("- **Details:** Some details");
|
||||
expect(lines).toContain("- **Current Step:** Step 1");
|
||||
});
|
||||
|
||||
it("serializes a completed task with strikethrough", () => {
|
||||
const task = {
|
||||
id: "TASK-001",
|
||||
title: "Done Task",
|
||||
status: "done" as const,
|
||||
started: "2026-02-14 09:00",
|
||||
rawLines: [],
|
||||
isCompleted: true,
|
||||
};
|
||||
|
||||
const lines = serializeTask(task);
|
||||
expect(lines[0]).toBe("## ~~TASK-001: Done Task~~");
|
||||
});
|
||||
});
|
||||
|
||||
describe("serializeTaskLedger", () => {
|
||||
it("round-trips a task ledger", () => {
|
||||
const ledger = {
|
||||
activeTasks: [
|
||||
{
|
||||
id: "TASK-001",
|
||||
title: "Active Task",
|
||||
status: "in_progress" as const,
|
||||
started: "2026-02-14 09:00",
|
||||
updated: "2026-02-14 10:00",
|
||||
details: "Details here",
|
||||
rawLines: [],
|
||||
isCompleted: false,
|
||||
},
|
||||
],
|
||||
completedTasks: [
|
||||
{
|
||||
id: "TASK-000",
|
||||
title: "Old Task",
|
||||
status: "done" as const,
|
||||
started: "2026-02-13 09:00",
|
||||
rawLines: [],
|
||||
isCompleted: true,
|
||||
},
|
||||
],
|
||||
preamble: [],
|
||||
sectionSeparator: [],
|
||||
postamble: [],
|
||||
};
|
||||
|
||||
const serialized = serializeTaskLedger(ledger);
|
||||
expect(serialized).toContain("# Active Tasks");
|
||||
expect(serialized).toContain("## TASK-001: Active Task");
|
||||
expect(serialized).toContain("# Completed");
|
||||
expect(serialized).toContain("## ~~TASK-000: Old Task~~");
|
||||
});
|
||||
});
|
||||
|
||||
// ============================================================================
|
||||
// reviewAndArchiveStaleTasks (integration with filesystem)
|
||||
// ============================================================================
|
||||
|
||||
describe("reviewAndArchiveStaleTasks", () => {
|
||||
let tmpDir: string;
|
||||
|
||||
beforeEach(async () => {
|
||||
tmpDir = await fs.mkdtemp(path.join(os.tmpdir(), "task-ledger-test-"));
|
||||
});
|
||||
|
||||
afterEach(async () => {
|
||||
await fs.rm(tmpDir, { recursive: true, force: true });
|
||||
});
|
||||
|
||||
it("returns null when TASKS.md does not exist", async () => {
|
||||
const result = await reviewAndArchiveStaleTasks(tmpDir);
|
||||
expect(result).toBeNull();
|
||||
});
|
||||
|
||||
it("returns null for empty TASKS.md", async () => {
|
||||
await fs.writeFile(path.join(tmpDir, "TASKS.md"), "", "utf-8");
|
||||
const result = await reviewAndArchiveStaleTasks(tmpDir);
|
||||
expect(result).toBeNull();
|
||||
});
|
||||
|
||||
it("archives stale tasks", async () => {
|
||||
const content = [
|
||||
"# Active Tasks",
|
||||
"",
|
||||
"## TASK-001: Stale Task",
|
||||
"- **Status:** in_progress",
|
||||
"- **Started:** 2026-02-13 08:00",
|
||||
"- **Updated:** 2026-02-13 09:00",
|
||||
"",
|
||||
"## TASK-002: Fresh Task",
|
||||
"- **Status:** in_progress",
|
||||
"- **Started:** 2026-02-14 09:00",
|
||||
"- **Updated:** 2026-02-14 23:00",
|
||||
"",
|
||||
"# Completed",
|
||||
"<!-- Move done tasks here with completion date -->",
|
||||
].join("\n");
|
||||
|
||||
await fs.writeFile(path.join(tmpDir, "TASKS.md"), content, "utf-8");
|
||||
|
||||
// "now" is Feb 15, 10:00 — TASK-001 updated Feb 13, 09:00 (>24h ago), TASK-002 updated Feb 14, 23:00 (<24h ago)
|
||||
const now = new Date("2026-02-15T10:00:00");
|
||||
const result = await reviewAndArchiveStaleTasks(tmpDir, undefined, now);
|
||||
|
||||
expect(result).not.toBeNull();
|
||||
expect(result!.staleCount).toBe(1);
|
||||
expect(result!.archivedCount).toBe(1);
|
||||
expect(result!.archivedIds).toEqual(["TASK-001"]);
|
||||
|
||||
// Verify the file was updated
|
||||
const updated = await fs.readFile(path.join(tmpDir, "TASKS.md"), "utf-8");
|
||||
expect(updated).toContain("## TASK-002: Fresh Task");
|
||||
expect(updated).toContain("## ~~TASK-001: Stale Task~~");
|
||||
|
||||
// Re-parse to verify structure
|
||||
const ledger = parseTaskLedger(updated);
|
||||
expect(ledger.activeTasks).toHaveLength(1);
|
||||
expect(ledger.activeTasks[0].id).toBe("TASK-002");
|
||||
expect(ledger.completedTasks).toHaveLength(1);
|
||||
expect(ledger.completedTasks[0].id).toBe("TASK-001");
|
||||
expect(ledger.completedTasks[0].status).toBe("stale");
|
||||
});
|
||||
|
||||
it("does nothing when no tasks are stale", async () => {
|
||||
const content = [
|
||||
"# Active Tasks",
|
||||
"",
|
||||
"## TASK-001: Fresh Task",
|
||||
"- **Status:** in_progress",
|
||||
"- **Started:** 2026-02-15 09:00",
|
||||
"- **Updated:** 2026-02-15 09:30",
|
||||
"",
|
||||
"# Completed",
|
||||
].join("\n");
|
||||
|
||||
await fs.writeFile(path.join(tmpDir, "TASKS.md"), content, "utf-8");
|
||||
|
||||
const now = new Date("2026-02-15T10:00:00");
|
||||
const result = await reviewAndArchiveStaleTasks(tmpDir, undefined, now);
|
||||
|
||||
expect(result).not.toBeNull();
|
||||
expect(result!.staleCount).toBe(0);
|
||||
expect(result!.archivedCount).toBe(0);
|
||||
});
|
||||
|
||||
it("supports custom maxAgeMs", async () => {
|
||||
const content = [
|
||||
"# Active Tasks",
|
||||
"",
|
||||
"## TASK-001: Semi-Fresh Task",
|
||||
"- **Status:** in_progress",
|
||||
"- **Started:** 2026-02-15 06:00",
|
||||
"- **Updated:** 2026-02-15 06:00",
|
||||
"",
|
||||
"# Completed",
|
||||
].join("\n");
|
||||
|
||||
await fs.writeFile(path.join(tmpDir, "TASKS.md"), content, "utf-8");
|
||||
|
||||
const now = new Date("2026-02-15T10:00:00");
|
||||
const oneHourMs = 60 * 60 * 1000;
|
||||
|
||||
// With 1-hour threshold, task is stale (4 hours old)
|
||||
const result = await reviewAndArchiveStaleTasks(tmpDir, oneHourMs, now);
|
||||
expect(result!.archivedCount).toBe(1);
|
||||
});
|
||||
});
|
||||
416
extensions/memory-neo4j/task-ledger.ts
Normal file
416
extensions/memory-neo4j/task-ledger.ts
Normal file
@@ -0,0 +1,416 @@
|
||||
/**
|
||||
* Task Ledger (TASKS.md) maintenance utilities.
|
||||
*
|
||||
* Parses and updates the structured task ledger file used by agents
|
||||
* to track active work across compaction events. The sleep cycle uses
|
||||
* these utilities to archive stale tasks (>24h with no activity).
|
||||
*/
|
||||
|
||||
import fs from "node:fs/promises";
|
||||
import path from "node:path";
|
||||
|
||||
// ============================================================================
|
||||
// Types
|
||||
// ============================================================================
|
||||
|
||||
export type TaskStatus = "in_progress" | "awaiting_input" | "blocked" | "done" | "stale" | string;
|
||||
|
||||
export type ParsedTask = {
|
||||
/** Task ID (e.g. "TASK-001") */
|
||||
id: string;
|
||||
/** Short title */
|
||||
title: string;
|
||||
/** Current status */
|
||||
status: TaskStatus;
|
||||
/** When the task was started (ISO-ish string) */
|
||||
started?: string;
|
||||
/** When the task was last updated (ISO-ish string) */
|
||||
updated?: string;
|
||||
/** Task details/description */
|
||||
details?: string;
|
||||
/** Current step being worked on */
|
||||
currentStep?: string;
|
||||
/** What's blocking progress */
|
||||
blockedOn?: string;
|
||||
/** Raw markdown lines for this task section (for round-tripping) */
|
||||
rawLines: string[];
|
||||
/** Whether this task is in the completed section */
|
||||
isCompleted: boolean;
|
||||
};
|
||||
|
||||
export type TaskLedger = {
|
||||
activeTasks: ParsedTask[];
|
||||
completedTasks: ParsedTask[];
|
||||
/** Lines before the first task section (header, etc.) */
|
||||
preamble: string[];
|
||||
/** Lines between active and completed sections */
|
||||
sectionSeparator: string[];
|
||||
/** Lines after the completed section */
|
||||
postamble: string[];
|
||||
};
|
||||
|
||||
export type StaleTaskResult = {
|
||||
/** Number of tasks found that are stale */
|
||||
staleCount: number;
|
||||
/** Number of tasks archived (moved to completed) */
|
||||
archivedCount: number;
|
||||
/** Task IDs that were archived */
|
||||
archivedIds: string[];
|
||||
};
|
||||
|
||||
// ============================================================================
|
||||
// Parsing
|
||||
// ============================================================================
|
||||
|
||||
/**
|
||||
* Parse a TASKS.md file content into structured task data.
|
||||
*/
|
||||
export function parseTaskLedger(content: string): TaskLedger {
|
||||
const lines = content.split("\n");
|
||||
const activeTasks: ParsedTask[] = [];
|
||||
const completedTasks: ParsedTask[] = [];
|
||||
const preamble: string[] = [];
|
||||
const sectionSeparator: string[] = [];
|
||||
const postamble: string[] = [];
|
||||
|
||||
let currentSection: "preamble" | "active" | "completed" | "postamble" = "preamble";
|
||||
let currentTask: ParsedTask | null = null;
|
||||
|
||||
for (const line of lines) {
|
||||
const trimmed = line.trim();
|
||||
|
||||
// Detect section headers
|
||||
if (/^#\s+Active\s+Tasks/i.test(trimmed)) {
|
||||
if (currentTask) {
|
||||
pushTask(currentTask, activeTasks, completedTasks);
|
||||
currentTask = null;
|
||||
}
|
||||
currentSection = "active";
|
||||
preamble.push(line);
|
||||
continue;
|
||||
}
|
||||
|
||||
if (/^#\s+Completed/i.test(trimmed)) {
|
||||
if (currentTask) {
|
||||
pushTask(currentTask, activeTasks, completedTasks);
|
||||
currentTask = null;
|
||||
}
|
||||
currentSection = "completed";
|
||||
sectionSeparator.push(line);
|
||||
continue;
|
||||
}
|
||||
|
||||
// Detect task headers (## TASK-NNN: Title or ## ~~TASK-NNN: Title~~)
|
||||
const taskMatch = trimmed.match(/^##\s+(?:~~)?(TASK-\d+):\s*(.+?)(?:~~)?$/);
|
||||
if (taskMatch) {
|
||||
if (currentTask) {
|
||||
pushTask(currentTask, activeTasks, completedTasks);
|
||||
}
|
||||
const isStrikethrough = trimmed.includes("~~");
|
||||
currentTask = {
|
||||
id: taskMatch[1],
|
||||
title: taskMatch[2].replace(/~~/g, "").trim(),
|
||||
status: isStrikethrough ? "done" : "in_progress",
|
||||
rawLines: [line],
|
||||
isCompleted: currentSection === "completed" || isStrikethrough,
|
||||
};
|
||||
continue;
|
||||
}
|
||||
|
||||
// Parse task fields (- **Field:** Value)
|
||||
if (currentTask) {
|
||||
const fieldMatch = trimmed.match(/^-\s+\*\*(.+?):\*\*\s*(.*)$/);
|
||||
if (fieldMatch) {
|
||||
const fieldName = fieldMatch[1].toLowerCase();
|
||||
const value = fieldMatch[2].trim();
|
||||
switch (fieldName) {
|
||||
case "status":
|
||||
currentTask.status = value;
|
||||
break;
|
||||
case "started":
|
||||
currentTask.started = value;
|
||||
break;
|
||||
case "updated":
|
||||
case "last updated":
|
||||
currentTask.updated = value;
|
||||
break;
|
||||
case "details":
|
||||
currentTask.details = value;
|
||||
break;
|
||||
case "current step":
|
||||
currentTask.currentStep = value;
|
||||
break;
|
||||
case "blocked on":
|
||||
currentTask.blockedOn = value;
|
||||
break;
|
||||
}
|
||||
currentTask.rawLines.push(line);
|
||||
continue;
|
||||
}
|
||||
|
||||
// Non-field lines within a task
|
||||
if (trimmed !== "" && !trimmed.startsWith("#")) {
|
||||
currentTask.rawLines.push(line);
|
||||
continue;
|
||||
}
|
||||
|
||||
// Empty line within a task — include it
|
||||
if (trimmed === "") {
|
||||
currentTask.rawLines.push(line);
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
// Lines not part of a task
|
||||
switch (currentSection) {
|
||||
case "preamble":
|
||||
case "active":
|
||||
preamble.push(line);
|
||||
break;
|
||||
case "completed":
|
||||
sectionSeparator.push(line);
|
||||
break;
|
||||
case "postamble":
|
||||
postamble.push(line);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// Push the last task
|
||||
if (currentTask) {
|
||||
pushTask(currentTask, activeTasks, completedTasks);
|
||||
}
|
||||
|
||||
return { activeTasks, completedTasks, preamble, sectionSeparator, postamble };
|
||||
}
|
||||
|
||||
function pushTask(task: ParsedTask, active: ParsedTask[], completed: ParsedTask[]) {
|
||||
if (task.isCompleted || task.status === "done") {
|
||||
completed.push(task);
|
||||
} else {
|
||||
active.push(task);
|
||||
}
|
||||
}
|
||||
|
||||
// ============================================================================
|
||||
// Staleness Detection
|
||||
// ============================================================================
|
||||
|
||||
/**
|
||||
* Parse a date string from the task ledger.
|
||||
* Accepts formats like "2026-02-14 09:15", "2026-02-14 09:15 MYT",
|
||||
* "2026-02-14T09:15:00", etc.
|
||||
*/
|
||||
export function parseTaskDate(dateStr: string): Date | null {
|
||||
if (!dateStr) {
|
||||
return null;
|
||||
}
|
||||
const cleaned = dateStr
|
||||
.trim()
|
||||
// Remove timezone abbreviations like MYT, UTC, PST
|
||||
.replace(/\s+[A-Z]{2,5}$/, "")
|
||||
// Normalize space-separated date time to ISO
|
||||
.replace(/^(\d{4}-\d{2}-\d{2})\s+(\d{2}:\d{2})/, "$1T$2");
|
||||
|
||||
const date = new Date(cleaned);
|
||||
if (Number.isNaN(date.getTime())) {
|
||||
return null;
|
||||
}
|
||||
return date;
|
||||
}
|
||||
|
||||
/**
|
||||
* Find tasks that are stale (no update in more than `maxAgeMs` milliseconds).
|
||||
* Default: 24 hours.
|
||||
*/
|
||||
export function findStaleTasks(
|
||||
tasks: ParsedTask[],
|
||||
now: Date = new Date(),
|
||||
maxAgeMs: number = 24 * 60 * 60 * 1000,
|
||||
): ParsedTask[] {
|
||||
return tasks.filter((task) => {
|
||||
// Only check active tasks (not already done/stale)
|
||||
if (task.status === "done" || task.status === "stale") {
|
||||
return false;
|
||||
}
|
||||
|
||||
const lastUpdate = task.updated || task.started;
|
||||
if (!lastUpdate) {
|
||||
// No date info — consider stale if we can't determine age
|
||||
return true;
|
||||
}
|
||||
|
||||
const date = parseTaskDate(lastUpdate);
|
||||
if (!date) {
|
||||
return false; // Can't parse date — don't mark as stale
|
||||
}
|
||||
|
||||
const ageMs = now.getTime() - date.getTime();
|
||||
return ageMs > maxAgeMs;
|
||||
});
|
||||
}
|
||||
|
||||
// ============================================================================
|
||||
// Task Ledger Serialization
|
||||
// ============================================================================
|
||||
|
||||
/**
|
||||
* Serialize a task back to markdown lines.
|
||||
* If the task has rawLines from parsing, regenerate only the header and status
|
||||
* (which may have changed) while preserving other raw content.
|
||||
* For new/modified tasks without rawLines, generate from parsed fields.
|
||||
*/
|
||||
export function serializeTask(task: ParsedTask): string[] {
|
||||
const titlePrefix = task.isCompleted
|
||||
? `## ~~${task.id}: ${task.title}~~`
|
||||
: `## ${task.id}: ${task.title}`;
|
||||
|
||||
// If we have rawLines and the task was only modified (status/updated changed
|
||||
// by archival), rebuild from rawLines with updated field values.
|
||||
if (task.rawLines.length > 0) {
|
||||
const lines: string[] = [titlePrefix];
|
||||
for (const line of task.rawLines.slice(1)) {
|
||||
const trimmed = line.trim();
|
||||
// Replace Status field with current value
|
||||
if (/^-\s+\*\*Status:\*\*/.test(trimmed)) {
|
||||
lines.push(`- **Status:** ${task.status}`);
|
||||
} else if (/^-\s+\*\*(?:Updated|Last Updated):\*\*/.test(trimmed)) {
|
||||
lines.push(`- **Updated:** ${task.updated ?? ""}`);
|
||||
} else {
|
||||
lines.push(line);
|
||||
}
|
||||
}
|
||||
return lines;
|
||||
}
|
||||
|
||||
// Fallback: generate from parsed fields (for newly created tasks)
|
||||
const lines: string[] = [titlePrefix];
|
||||
lines.push(`- **Status:** ${task.status}`);
|
||||
if (task.started) {
|
||||
lines.push(`- **Started:** ${task.started}`);
|
||||
}
|
||||
if (task.updated) {
|
||||
lines.push(`- **Updated:** ${task.updated}`);
|
||||
}
|
||||
if (task.details) {
|
||||
lines.push(`- **Details:** ${task.details}`);
|
||||
}
|
||||
if (task.currentStep) {
|
||||
lines.push(`- **Current Step:** ${task.currentStep}`);
|
||||
}
|
||||
if (task.blockedOn) {
|
||||
lines.push(`- **Blocked On:** ${task.blockedOn}`);
|
||||
}
|
||||
return lines;
|
||||
}
|
||||
|
||||
/**
|
||||
* Serialize the full task ledger back to markdown.
|
||||
* Preserves preamble, section separators, and postamble from the original parse.
|
||||
*/
|
||||
export function serializeTaskLedger(ledger: TaskLedger): string {
|
||||
const lines: string[] = [];
|
||||
|
||||
// Use original preamble if available, otherwise generate header
|
||||
if (ledger.preamble.length > 0) {
|
||||
lines.push(...ledger.preamble);
|
||||
} else {
|
||||
lines.push("# Active Tasks");
|
||||
lines.push("");
|
||||
}
|
||||
|
||||
// Active tasks
|
||||
for (const task of ledger.activeTasks) {
|
||||
lines.push(...serializeTask(task));
|
||||
lines.push("");
|
||||
}
|
||||
|
||||
// Use original section separator if available, otherwise generate
|
||||
if (ledger.sectionSeparator.length > 0) {
|
||||
lines.push(...ledger.sectionSeparator);
|
||||
} else {
|
||||
lines.push("# Completed");
|
||||
lines.push("<!-- Move done tasks here with completion date -->");
|
||||
}
|
||||
lines.push("");
|
||||
|
||||
// Completed tasks
|
||||
for (const task of ledger.completedTasks) {
|
||||
lines.push(...serializeTask(task));
|
||||
lines.push("");
|
||||
}
|
||||
|
||||
// Preserve postamble
|
||||
if (ledger.postamble.length > 0) {
|
||||
lines.push(...ledger.postamble);
|
||||
}
|
||||
|
||||
return lines.join("\n").trimEnd() + "\n";
|
||||
}
|
||||
|
||||
// ============================================================================
|
||||
// Sleep Cycle Integration
|
||||
// ============================================================================
|
||||
|
||||
/**
|
||||
* Review TASKS.md for stale tasks and archive them.
|
||||
* This is called during the sleep cycle.
|
||||
*
|
||||
* @param workspaceDir - Path to the workspace directory
|
||||
* @param maxAgeMs - Maximum age before a task is considered stale (default: 24h)
|
||||
* @param now - Current time (for testing)
|
||||
* @returns Result of the stale task review, or null if TASKS.md doesn't exist
|
||||
*/
|
||||
export async function reviewAndArchiveStaleTasks(
|
||||
workspaceDir: string,
|
||||
maxAgeMs: number = 24 * 60 * 60 * 1000,
|
||||
now: Date = new Date(),
|
||||
): Promise<StaleTaskResult | null> {
|
||||
const tasksPath = path.join(workspaceDir, "TASKS.md");
|
||||
|
||||
let content: string;
|
||||
try {
|
||||
content = await fs.readFile(tasksPath, "utf-8");
|
||||
} catch {
|
||||
// TASKS.md doesn't exist — nothing to do
|
||||
return null;
|
||||
}
|
||||
|
||||
if (!content.trim()) {
|
||||
return null;
|
||||
}
|
||||
|
||||
const ledger = parseTaskLedger(content);
|
||||
const staleTasks = findStaleTasks(ledger.activeTasks, now, maxAgeMs);
|
||||
|
||||
if (staleTasks.length === 0) {
|
||||
return { staleCount: 0, archivedCount: 0, archivedIds: [] };
|
||||
}
|
||||
|
||||
const archivedIds: string[] = [];
|
||||
const nowStr = `${now.getFullYear()}-${String(now.getMonth() + 1).padStart(2, "0")}-${String(now.getDate()).padStart(2, "0")} ${String(now.getHours()).padStart(2, "0")}:${String(now.getMinutes()).padStart(2, "0")}`;
|
||||
|
||||
for (const task of staleTasks) {
|
||||
task.status = "stale";
|
||||
task.updated = nowStr;
|
||||
task.isCompleted = true;
|
||||
|
||||
// Move from active to completed
|
||||
const idx = ledger.activeTasks.indexOf(task);
|
||||
if (idx !== -1) {
|
||||
ledger.activeTasks.splice(idx, 1);
|
||||
}
|
||||
ledger.completedTasks.push(task);
|
||||
archivedIds.push(task.id);
|
||||
}
|
||||
|
||||
// Write back
|
||||
const updated = serializeTaskLedger(ledger);
|
||||
await fs.writeFile(tasksPath, updated, "utf-8");
|
||||
|
||||
return {
|
||||
staleCount: staleTasks.length,
|
||||
archivedCount: archivedIds.length,
|
||||
archivedIds,
|
||||
};
|
||||
}
|
||||
@@ -0,0 +1,64 @@
|
||||
import { describe, expect, it } from "vitest";
|
||||
import { DEFAULT_COMPACTION_INSTRUCTIONS } from "./compact.js";
|
||||
|
||||
describe("DEFAULT_COMPACTION_INSTRUCTIONS", () => {
|
||||
it("contains priority ordering with numbered items", () => {
|
||||
expect(DEFAULT_COMPACTION_INSTRUCTIONS).toContain("1.");
|
||||
expect(DEFAULT_COMPACTION_INSTRUCTIONS).toContain("2.");
|
||||
expect(DEFAULT_COMPACTION_INSTRUCTIONS).toContain("3.");
|
||||
expect(DEFAULT_COMPACTION_INSTRUCTIONS).toContain("4.");
|
||||
expect(DEFAULT_COMPACTION_INSTRUCTIONS).toContain("5.");
|
||||
});
|
||||
|
||||
it("prioritizes active tasks first", () => {
|
||||
const taskLine = DEFAULT_COMPACTION_INSTRUCTIONS.indexOf("active or in-progress tasks");
|
||||
const decisionsLine = DEFAULT_COMPACTION_INSTRUCTIONS.indexOf("Key decisions");
|
||||
expect(taskLine).toBeLessThan(decisionsLine);
|
||||
expect(taskLine).toBeGreaterThan(-1);
|
||||
});
|
||||
|
||||
it("mentions TASKS.md for task ledger continuity", () => {
|
||||
expect(DEFAULT_COMPACTION_INSTRUCTIONS).toContain("TASKS.md");
|
||||
});
|
||||
|
||||
it("includes de-prioritization guidance", () => {
|
||||
expect(DEFAULT_COMPACTION_INSTRUCTIONS).toContain("De-prioritize");
|
||||
expect(DEFAULT_COMPACTION_INSTRUCTIONS).toContain("casual conversation");
|
||||
expect(DEFAULT_COMPACTION_INSTRUCTIONS).toContain("completed tasks");
|
||||
});
|
||||
|
||||
it("mentions exact values needed to resume work", () => {
|
||||
expect(DEFAULT_COMPACTION_INSTRUCTIONS).toContain("file paths");
|
||||
expect(DEFAULT_COMPACTION_INSTRUCTIONS).toContain("URLs");
|
||||
expect(DEFAULT_COMPACTION_INSTRUCTIONS).toContain("IDs");
|
||||
});
|
||||
|
||||
it("includes tool state preservation", () => {
|
||||
expect(DEFAULT_COMPACTION_INSTRUCTIONS).toContain("Tool state");
|
||||
expect(DEFAULT_COMPACTION_INSTRUCTIONS).toContain("browser sessions");
|
||||
});
|
||||
});
|
||||
|
||||
describe("compaction instructions merging", () => {
|
||||
it("custom instructions are appended to defaults", () => {
|
||||
const customInstructions = "Also remember to include user preferences.";
|
||||
const merged = `${DEFAULT_COMPACTION_INSTRUCTIONS}\n\n${customInstructions}`;
|
||||
|
||||
// Defaults come first
|
||||
expect(merged.indexOf("When summarizing")).toBeLessThan(merged.indexOf(customInstructions));
|
||||
// Custom instructions are present
|
||||
expect(merged).toContain(customInstructions);
|
||||
// Defaults are not lost
|
||||
expect(merged).toContain("active or in-progress tasks");
|
||||
});
|
||||
|
||||
it("when no custom instructions, defaults are used alone", () => {
|
||||
// Simulate the compaction path where customInstructions is undefined
|
||||
const resolve = (custom?: string) =>
|
||||
custom ? `${DEFAULT_COMPACTION_INSTRUCTIONS}\n\n${custom}` : DEFAULT_COMPACTION_INSTRUCTIONS;
|
||||
|
||||
const result = resolve(undefined);
|
||||
expect(result).toBe(DEFAULT_COMPACTION_INSTRUCTIONS);
|
||||
expect(result).not.toContain("\n\nundefined");
|
||||
});
|
||||
});
|
||||
@@ -0,0 +1,62 @@
|
||||
import { describe, expect, it } from "vitest";
|
||||
import { buildAgentSystemPrompt } from "../system-prompt.js";
|
||||
|
||||
describe("Task Ledger section", () => {
|
||||
it("includes the Task Ledger section in full prompt mode", () => {
|
||||
const prompt = buildAgentSystemPrompt({
|
||||
workspaceDir: "/tmp/openclaw",
|
||||
});
|
||||
|
||||
expect(prompt).toContain("## Task Ledger (TASKS.md)");
|
||||
});
|
||||
|
||||
it("describes the task format with required fields", () => {
|
||||
const prompt = buildAgentSystemPrompt({
|
||||
workspaceDir: "/tmp/openclaw",
|
||||
});
|
||||
|
||||
expect(prompt).toContain("**Status:**");
|
||||
expect(prompt).toContain("**Started:**");
|
||||
expect(prompt).toContain("**Updated:**");
|
||||
expect(prompt).toContain("**Current Step:**");
|
||||
});
|
||||
|
||||
it("mentions stale task archival by sleep cycle", () => {
|
||||
const prompt = buildAgentSystemPrompt({
|
||||
workspaceDir: "/tmp/openclaw",
|
||||
});
|
||||
|
||||
expect(prompt).toContain("sleep cycle");
|
||||
expect(prompt).toContain(">24h");
|
||||
});
|
||||
|
||||
it("omits the section in minimal (subagent) prompt mode", () => {
|
||||
const prompt = buildAgentSystemPrompt({
|
||||
workspaceDir: "/tmp/openclaw",
|
||||
promptMode: "minimal",
|
||||
});
|
||||
|
||||
expect(prompt).not.toContain("## Task Ledger (TASKS.md)");
|
||||
});
|
||||
|
||||
it("omits the section in none prompt mode", () => {
|
||||
const prompt = buildAgentSystemPrompt({
|
||||
workspaceDir: "/tmp/openclaw",
|
||||
promptMode: "none",
|
||||
});
|
||||
|
||||
expect(prompt).not.toContain("## Task Ledger (TASKS.md)");
|
||||
});
|
||||
});
|
||||
|
||||
describe("Post-Compaction Recovery", () => {
|
||||
it("does NOT include a static recovery section (handled by framework injection)", () => {
|
||||
const prompt = buildAgentSystemPrompt({
|
||||
workspaceDir: "/tmp/openclaw",
|
||||
});
|
||||
|
||||
// Recovery instructions are injected dynamically via post-compaction-recovery.ts,
|
||||
// not baked into the system prompt (avoids wasting tokens on every turn).
|
||||
expect(prompt).not.toContain("## Post-Compaction Recovery");
|
||||
});
|
||||
});
|
||||
@@ -625,6 +625,38 @@ export function buildAgentSystemPrompt(params: {
|
||||
);
|
||||
}
|
||||
|
||||
// Task Ledger instructions (skip for subagent/none modes)
|
||||
if (!isMinimal) {
|
||||
lines.push(
|
||||
"## Task Ledger (TASKS.md)",
|
||||
"Maintain a TASKS.md file in the workspace root to track active work across compaction events.",
|
||||
"Update it whenever you start, progress, or complete a task. Format:",
|
||||
"",
|
||||
"```markdown",
|
||||
"# Active Tasks",
|
||||
"",
|
||||
"## TASK-001: <short title>",
|
||||
"- **Status:** in_progress | awaiting_input | blocked | done",
|
||||
"- **Started:** YYYY-MM-DD HH:MM",
|
||||
"- **Updated:** YYYY-MM-DD HH:MM",
|
||||
"- **Details:** What this task is about",
|
||||
"- **Current Step:** What you're doing right now",
|
||||
"- **Blocked On:** (if applicable) What's preventing progress",
|
||||
"",
|
||||
"# Completed",
|
||||
"<!-- Move done tasks here with completion date -->",
|
||||
"```",
|
||||
"",
|
||||
"Rules:",
|
||||
"- Create TASKS.md on first task if it doesn't exist.",
|
||||
"- Update **Updated** timestamp and **Current Step** as you make progress.",
|
||||
"- Move tasks to Completed when done; include completion date.",
|
||||
"- Keep IDs sequential (TASK-001, TASK-002, etc.).",
|
||||
"- Stale tasks (>24h with no update) may be auto-archived by the sleep cycle.",
|
||||
"",
|
||||
);
|
||||
}
|
||||
|
||||
// Skip heartbeats for subagent/none modes
|
||||
if (!isMinimal) {
|
||||
lines.push(
|
||||
|
||||
79
src/agents/workspace.tasks-bootstrap.test.ts
Normal file
79
src/agents/workspace.tasks-bootstrap.test.ts
Normal file
@@ -0,0 +1,79 @@
|
||||
import { describe, expect, it } from "vitest";
|
||||
import { makeTempWorkspace, writeWorkspaceFile } from "../test-helpers/workspace.js";
|
||||
import {
|
||||
DEFAULT_TASKS_FILENAME,
|
||||
filterBootstrapFilesForSession,
|
||||
loadWorkspaceBootstrapFiles,
|
||||
} from "./workspace.js";
|
||||
|
||||
describe("TASKS.md bootstrap", () => {
|
||||
it("DEFAULT_TASKS_FILENAME equals TASKS.md", () => {
|
||||
expect(DEFAULT_TASKS_FILENAME).toBe("TASKS.md");
|
||||
});
|
||||
|
||||
it("loadWorkspaceBootstrapFiles includes TASKS.md entry", async () => {
|
||||
const tempDir = await makeTempWorkspace("openclaw-tasks-");
|
||||
|
||||
const files = await loadWorkspaceBootstrapFiles(tempDir);
|
||||
const tasksEntry = files.find((f) => f.name === DEFAULT_TASKS_FILENAME);
|
||||
|
||||
expect(tasksEntry).toBeDefined();
|
||||
});
|
||||
|
||||
it("loads TASKS.md content when the file exists", async () => {
|
||||
const tempDir = await makeTempWorkspace("openclaw-tasks-");
|
||||
await writeWorkspaceFile({ dir: tempDir, name: "TASKS.md", content: "- [ ] finish tests" });
|
||||
|
||||
const files = await loadWorkspaceBootstrapFiles(tempDir);
|
||||
const tasksEntry = files.find((f) => f.name === DEFAULT_TASKS_FILENAME);
|
||||
|
||||
expect(tasksEntry).toBeDefined();
|
||||
expect(tasksEntry!.missing).toBe(false);
|
||||
expect(tasksEntry!.content).toBe("- [ ] finish tests");
|
||||
});
|
||||
|
||||
it("marks TASKS.md as missing (not error) when the file does not exist", async () => {
|
||||
const tempDir = await makeTempWorkspace("openclaw-tasks-");
|
||||
|
||||
const files = await loadWorkspaceBootstrapFiles(tempDir);
|
||||
const tasksEntry = files.find((f) => f.name === DEFAULT_TASKS_FILENAME);
|
||||
|
||||
expect(tasksEntry).toBeDefined();
|
||||
expect(tasksEntry!.missing).toBe(true);
|
||||
expect(tasksEntry!.content).toBeUndefined();
|
||||
});
|
||||
|
||||
it("TASKS.md is in SUBAGENT_BOOTSTRAP_ALLOWLIST (kept for subagent sessions)", () => {
|
||||
const files = [
|
||||
{
|
||||
name: DEFAULT_TASKS_FILENAME as const,
|
||||
path: "/tmp/TASKS.md",
|
||||
missing: false,
|
||||
content: "tasks",
|
||||
},
|
||||
{ name: "SOUL.md" as const, path: "/tmp/SOUL.md", missing: false, content: "soul" },
|
||||
];
|
||||
|
||||
const filtered = filterBootstrapFilesForSession(files, "agent:main:subagent:test-123");
|
||||
|
||||
const tasksKept = filtered.find((f) => f.name === DEFAULT_TASKS_FILENAME);
|
||||
expect(tasksKept).toBeDefined();
|
||||
});
|
||||
|
||||
it("filterBootstrapFilesForSession drops non-allowlisted files for subagent sessions", () => {
|
||||
const files = [
|
||||
{
|
||||
name: DEFAULT_TASKS_FILENAME as const,
|
||||
path: "/tmp/TASKS.md",
|
||||
missing: false,
|
||||
content: "tasks",
|
||||
},
|
||||
{ name: "SOUL.md" as const, path: "/tmp/SOUL.md", missing: false, content: "soul" },
|
||||
];
|
||||
|
||||
const filtered = filterBootstrapFilesForSession(files, "agent:main:subagent:test-123");
|
||||
|
||||
const soulKept = filtered.find((f) => f.name === "SOUL.md");
|
||||
expect(soulKept).toBeUndefined();
|
||||
});
|
||||
});
|
||||
@@ -28,6 +28,7 @@ export const DEFAULT_USER_FILENAME = "USER.md";
|
||||
export const DEFAULT_HEARTBEAT_FILENAME = "HEARTBEAT.md";
|
||||
export const DEFAULT_BOOTSTRAP_FILENAME = "BOOTSTRAP.md";
|
||||
export const DEFAULT_MEMORY_FILENAME = "MEMORY.md";
|
||||
export const DEFAULT_TASKS_FILENAME = "TASKS.md";
|
||||
export const DEFAULT_MEMORY_ALT_FILENAME = "memory.md";
|
||||
const WORKSPACE_STATE_DIRNAME = ".openclaw";
|
||||
const WORKSPACE_STATE_FILENAME = "workspace-state.json";
|
||||
@@ -87,6 +88,7 @@ export type WorkspaceBootstrapFileName =
|
||||
| typeof DEFAULT_HEARTBEAT_FILENAME
|
||||
| typeof DEFAULT_BOOTSTRAP_FILENAME
|
||||
| typeof DEFAULT_MEMORY_FILENAME
|
||||
| typeof DEFAULT_TASKS_FILENAME
|
||||
| typeof DEFAULT_MEMORY_ALT_FILENAME;
|
||||
|
||||
export type WorkspaceBootstrapFile = {
|
||||
@@ -444,6 +446,10 @@ export async function loadWorkspaceBootstrapFiles(dir: string): Promise<Workspac
|
||||
name: DEFAULT_BOOTSTRAP_FILENAME,
|
||||
filePath: path.join(resolvedDir, DEFAULT_BOOTSTRAP_FILENAME),
|
||||
},
|
||||
{
|
||||
name: DEFAULT_TASKS_FILENAME,
|
||||
filePath: path.join(resolvedDir, DEFAULT_TASKS_FILENAME),
|
||||
},
|
||||
];
|
||||
|
||||
entries.push(...(await resolveMemoryBootstrapEntries(resolvedDir)));
|
||||
@@ -465,7 +471,11 @@ export async function loadWorkspaceBootstrapFiles(dir: string): Promise<Workspac
|
||||
return result;
|
||||
}
|
||||
|
||||
const MINIMAL_BOOTSTRAP_ALLOWLIST = new Set([DEFAULT_AGENTS_FILENAME, DEFAULT_TOOLS_FILENAME]);
|
||||
const SUBAGENT_BOOTSTRAP_ALLOWLIST = new Set([
|
||||
DEFAULT_AGENTS_FILENAME,
|
||||
DEFAULT_TOOLS_FILENAME,
|
||||
DEFAULT_TASKS_FILENAME,
|
||||
]);
|
||||
|
||||
export function filterBootstrapFilesForSession(
|
||||
files: WorkspaceBootstrapFile[],
|
||||
@@ -474,7 +484,7 @@ export function filterBootstrapFilesForSession(
|
||||
if (!sessionKey || (!isSubagentSessionKey(sessionKey) && !isCronSessionKey(sessionKey))) {
|
||||
return files;
|
||||
}
|
||||
return files.filter((file) => MINIMAL_BOOTSTRAP_ALLOWLIST.has(file.name));
|
||||
return files.filter((file) => SUBAGENT_BOOTSTRAP_ALLOWLIST.has(file.name));
|
||||
}
|
||||
|
||||
export async function loadExtraBootstrapFiles(
|
||||
|
||||
@@ -23,6 +23,7 @@ import {
|
||||
resolveMemoryFlushSettings,
|
||||
shouldRunMemoryFlush,
|
||||
} from "./memory-flush.js";
|
||||
import { markNeedsPostCompactionRecovery } from "./post-compaction-recovery.js";
|
||||
import { incrementCompactionCount } from "./session-updates.js";
|
||||
|
||||
export async function runMemoryFlushIfNeeded(params: {
|
||||
@@ -179,6 +180,16 @@ export async function runMemoryFlushIfNeeded(params: {
|
||||
if (typeof nextCount === "number") {
|
||||
memoryFlushCompactionCount = nextCount;
|
||||
}
|
||||
// P3: Mark session for post-compaction recovery on the next turn.
|
||||
// This path handles flush-triggered compaction (memory flush forces a compact).
|
||||
// The main path in agent-runner.ts handles SDK auto-compaction.
|
||||
// These are mutually exclusive; setting true is idempotent.
|
||||
await markNeedsPostCompactionRecovery({
|
||||
sessionEntry: activeSessionEntry,
|
||||
sessionStore: activeSessionStore,
|
||||
sessionKey: params.sessionKey,
|
||||
storePath: params.storePath,
|
||||
});
|
||||
}
|
||||
if (params.storePath && params.sessionKey) {
|
||||
try {
|
||||
|
||||
@@ -0,0 +1,62 @@
|
||||
import { describe, expect, it } from "vitest";
|
||||
import {
|
||||
DEFAULT_MEMORY_FLUSH_PROMPT,
|
||||
DEFAULT_MEMORY_FLUSH_SYSTEM_PROMPT,
|
||||
resolveMemoryFlushSettings,
|
||||
} from "./memory-flush.js";
|
||||
|
||||
describe("memory flush task checkpoint", () => {
|
||||
describe("DEFAULT_MEMORY_FLUSH_PROMPT", () => {
|
||||
it("includes task state extraction language", () => {
|
||||
expect(DEFAULT_MEMORY_FLUSH_PROMPT).toContain("active task");
|
||||
expect(DEFAULT_MEMORY_FLUSH_PROMPT).toContain("task name");
|
||||
expect(DEFAULT_MEMORY_FLUSH_PROMPT).toContain("current step");
|
||||
expect(DEFAULT_MEMORY_FLUSH_PROMPT).toContain("pending actions");
|
||||
});
|
||||
|
||||
it("instructs to use memory_store with core category and importance 1.0", () => {
|
||||
expect(DEFAULT_MEMORY_FLUSH_PROMPT).toContain("memory_store");
|
||||
expect(DEFAULT_MEMORY_FLUSH_PROMPT).toContain("category 'core'");
|
||||
expect(DEFAULT_MEMORY_FLUSH_PROMPT).toContain("importance 1.0");
|
||||
});
|
||||
});
|
||||
|
||||
describe("DEFAULT_MEMORY_FLUSH_SYSTEM_PROMPT", () => {
|
||||
it("includes CRITICAL instruction about active tasks", () => {
|
||||
expect(DEFAULT_MEMORY_FLUSH_SYSTEM_PROMPT).toContain("CRITICAL");
|
||||
expect(DEFAULT_MEMORY_FLUSH_SYSTEM_PROMPT).toContain("active task");
|
||||
});
|
||||
|
||||
it("instructs to save task state with core category", () => {
|
||||
expect(DEFAULT_MEMORY_FLUSH_SYSTEM_PROMPT).toContain("memory_store");
|
||||
expect(DEFAULT_MEMORY_FLUSH_SYSTEM_PROMPT).toContain("category='core'");
|
||||
expect(DEFAULT_MEMORY_FLUSH_SYSTEM_PROMPT).toContain("importance=1.0");
|
||||
});
|
||||
|
||||
it("mentions task continuity after compaction", () => {
|
||||
expect(DEFAULT_MEMORY_FLUSH_SYSTEM_PROMPT).toContain("task continuity after compaction");
|
||||
});
|
||||
});
|
||||
|
||||
describe("resolveMemoryFlushSettings", () => {
|
||||
it("returns prompts containing task-related keywords by default", () => {
|
||||
const settings = resolveMemoryFlushSettings();
|
||||
expect(settings).not.toBeNull();
|
||||
expect(settings?.prompt).toContain("active task");
|
||||
expect(settings?.prompt).toContain("memory_store");
|
||||
expect(settings?.systemPrompt).toContain("CRITICAL");
|
||||
expect(settings?.systemPrompt).toContain("task continuity");
|
||||
});
|
||||
|
||||
it("preserves task checkpoint language alongside existing content", () => {
|
||||
const settings = resolveMemoryFlushSettings();
|
||||
expect(settings).not.toBeNull();
|
||||
// Original content still present
|
||||
expect(settings?.prompt).toContain("Pre-compaction memory flush");
|
||||
expect(settings?.prompt).toContain("durable memories");
|
||||
// New task checkpoint content also present
|
||||
expect(settings?.prompt).toContain("current step");
|
||||
expect(settings?.prompt).toContain("pending actions");
|
||||
});
|
||||
});
|
||||
});
|
||||
@@ -36,6 +36,7 @@ import { appendUsageLine, formatResponseUsageLine } from "./agent-runner-utils.j
|
||||
import { createAudioAsVoiceBuffer, createBlockReplyPipeline } from "./block-reply-pipeline.js";
|
||||
import { resolveBlockStreamingCoalescing } from "./block-streaming.js";
|
||||
import { createFollowupRunner } from "./followup-runner.js";
|
||||
import { markNeedsPostCompactionRecovery } from "./post-compaction-recovery.js";
|
||||
import { enqueueFollowupRun, type FollowupRun, type QueueSettings } from "./queue.js";
|
||||
import { createReplyToModeFilterForChannel, resolveReplyToMode } from "./reply-threading.js";
|
||||
import { incrementRunCompactionCount, persistRunSessionUsage } from "./session-run-accounting.js";
|
||||
@@ -499,6 +500,16 @@ export async function runReplyAgent(params: {
|
||||
lastCallUsage: runResult.meta.agentMeta?.lastCallUsage,
|
||||
contextTokensUsed,
|
||||
});
|
||||
// P3: Mark session for post-compaction recovery on the next turn.
|
||||
// This path handles SDK auto-compaction (during the agent run itself).
|
||||
// The memory-flush path in agent-runner-memory.ts handles flush-triggered compaction.
|
||||
// These are mutually exclusive for a given compaction event; setting true is idempotent.
|
||||
await markNeedsPostCompactionRecovery({
|
||||
sessionEntry: activeSessionEntry,
|
||||
sessionStore: activeSessionStore,
|
||||
sessionKey,
|
||||
storePath,
|
||||
});
|
||||
if (verboseEnabled) {
|
||||
const suffix = typeof count === "number" ? ` (count ${count})` : "";
|
||||
finalPayloads = [{ text: `🧹 Auto-compaction complete${suffix}.` }, ...finalPayloads];
|
||||
|
||||
@@ -41,6 +41,10 @@ import { runReplyAgent } from "./agent-runner.js";
|
||||
import { applySessionHints } from "./body.js";
|
||||
import { buildGroupChatContext, buildGroupIntro } from "./groups.js";
|
||||
import { buildInboundMetaSystemPrompt, buildInboundUserContextPrefix } from "./inbound-meta.js";
|
||||
import {
|
||||
clearPostCompactionRecovery,
|
||||
prependPostCompactionRecovery,
|
||||
} from "./post-compaction-recovery.js";
|
||||
import { resolveQueueSettings } from "./queue.js";
|
||||
import { routeReply } from "./route-reply.js";
|
||||
import { BARE_SESSION_RESET_PROMPT } from "./session-reset-prompt.js";
|
||||
@@ -256,6 +260,18 @@ export async function runPreparedReply(
|
||||
isNewSession,
|
||||
prefixedBodyBase,
|
||||
});
|
||||
// P3: Prepend post-compaction recovery instructions if the previous turn
|
||||
// triggered auto-compaction. This ensures the agent recalls task state from
|
||||
// memory before responding to the user's next message.
|
||||
prefixedBodyBase = prependPostCompactionRecovery(prefixedBodyBase, sessionEntry);
|
||||
if (sessionEntry?.needsPostCompactionRecovery) {
|
||||
await clearPostCompactionRecovery({
|
||||
sessionEntry,
|
||||
sessionStore,
|
||||
sessionKey,
|
||||
storePath,
|
||||
});
|
||||
}
|
||||
prefixedBodyBase = appendUntrustedContext(prefixedBodyBase, sessionCtx.UntrustedContext);
|
||||
const threadStarterBody = ctx.ThreadStarterBody?.trim();
|
||||
const threadHistoryBody = ctx.ThreadHistoryBody?.trim();
|
||||
|
||||
@@ -12,12 +12,14 @@ export const DEFAULT_MEMORY_FLUSH_PROMPT = [
|
||||
"Pre-compaction memory flush.",
|
||||
"Store durable memories now (use memory/YYYY-MM-DD.md; create memory/ if needed).",
|
||||
"IMPORTANT: If the file already exists, APPEND new content only and do not overwrite existing entries.",
|
||||
"If there is an active task in progress, save its state: task name, current step, pending actions, and any critical variables. Use memory_store with category 'core' and importance 1.0 for active task state.",
|
||||
`If nothing to store, reply with ${SILENT_REPLY_TOKEN}.`,
|
||||
].join(" ");
|
||||
|
||||
export const DEFAULT_MEMORY_FLUSH_SYSTEM_PROMPT = [
|
||||
"Pre-compaction memory flush turn.",
|
||||
"The session is near auto-compaction; capture durable memories to disk.",
|
||||
"CRITICAL: If there is an active task being worked on, you MUST save its current state (task name, step, pending actions, key variables) to memory_store with category='core' and importance=1.0. This ensures task continuity after compaction.",
|
||||
`You may reply, but usually ${SILENT_REPLY_TOKEN} is correct.`,
|
||||
].join(" ");
|
||||
|
||||
|
||||
102
src/auto-reply/reply/post-compaction-recovery.test.ts
Normal file
102
src/auto-reply/reply/post-compaction-recovery.test.ts
Normal file
@@ -0,0 +1,102 @@
|
||||
import { describe, expect, it } from "vitest";
|
||||
import {
|
||||
getPostCompactionRecoveryPrompt,
|
||||
POST_COMPACTION_RECOVERY_PROMPT,
|
||||
prependPostCompactionRecovery,
|
||||
} from "./post-compaction-recovery.js";
|
||||
|
||||
describe("post-compaction-recovery", () => {
|
||||
describe("POST_COMPACTION_RECOVERY_PROMPT", () => {
|
||||
it("is defined and non-empty", () => {
|
||||
expect(POST_COMPACTION_RECOVERY_PROMPT).toBeTruthy();
|
||||
expect(POST_COMPACTION_RECOVERY_PROMPT.length).toBeGreaterThan(0);
|
||||
});
|
||||
|
||||
it("stays under 200 tokens (rough estimate: <800 chars)", () => {
|
||||
// A rough heuristic: 1 token ≈ 4 chars. 200 tokens ≈ 800 chars.
|
||||
expect(POST_COMPACTION_RECOVERY_PROMPT.length).toBeLessThan(800);
|
||||
});
|
||||
|
||||
it("includes memory_recall instruction", () => {
|
||||
expect(POST_COMPACTION_RECOVERY_PROMPT).toContain("memory_recall");
|
||||
});
|
||||
|
||||
it("includes TASKS.md instruction", () => {
|
||||
expect(POST_COMPACTION_RECOVERY_PROMPT).toContain("TASKS.md");
|
||||
});
|
||||
|
||||
it("includes Context Reset notification template", () => {
|
||||
expect(POST_COMPACTION_RECOVERY_PROMPT).toContain("Context Reset");
|
||||
});
|
||||
});
|
||||
|
||||
describe("getPostCompactionRecoveryPrompt", () => {
|
||||
it("returns null when entry is undefined", () => {
|
||||
expect(getPostCompactionRecoveryPrompt(undefined)).toBeNull();
|
||||
});
|
||||
|
||||
it("returns null when needsPostCompactionRecovery is false", () => {
|
||||
const entry = {
|
||||
sessionId: "test",
|
||||
updatedAt: Date.now(),
|
||||
needsPostCompactionRecovery: false,
|
||||
};
|
||||
expect(getPostCompactionRecoveryPrompt(entry)).toBeNull();
|
||||
});
|
||||
|
||||
it("returns null when needsPostCompactionRecovery is not set", () => {
|
||||
const entry = { sessionId: "test", updatedAt: Date.now() };
|
||||
expect(getPostCompactionRecoveryPrompt(entry)).toBeNull();
|
||||
});
|
||||
|
||||
it("returns the recovery prompt when needsPostCompactionRecovery is true", () => {
|
||||
const entry = {
|
||||
sessionId: "test",
|
||||
updatedAt: Date.now(),
|
||||
needsPostCompactionRecovery: true,
|
||||
};
|
||||
expect(getPostCompactionRecoveryPrompt(entry)).toBe(POST_COMPACTION_RECOVERY_PROMPT);
|
||||
});
|
||||
});
|
||||
|
||||
describe("prependPostCompactionRecovery", () => {
|
||||
it("returns original body when no recovery needed", () => {
|
||||
const body = "Hello, how are you?";
|
||||
expect(prependPostCompactionRecovery(body, undefined)).toBe(body);
|
||||
});
|
||||
|
||||
it("returns original body when flag is false", () => {
|
||||
const body = "Hello, how are you?";
|
||||
const entry = {
|
||||
sessionId: "test",
|
||||
updatedAt: Date.now(),
|
||||
needsPostCompactionRecovery: false,
|
||||
};
|
||||
expect(prependPostCompactionRecovery(body, entry)).toBe(body);
|
||||
});
|
||||
|
||||
it("prepends recovery prompt when flag is true", () => {
|
||||
const body = "Hello, how are you?";
|
||||
const entry = {
|
||||
sessionId: "test",
|
||||
updatedAt: Date.now(),
|
||||
needsPostCompactionRecovery: true,
|
||||
};
|
||||
const result = prependPostCompactionRecovery(body, entry);
|
||||
expect(result).toContain(POST_COMPACTION_RECOVERY_PROMPT);
|
||||
expect(result).toContain(body);
|
||||
expect(result.indexOf(POST_COMPACTION_RECOVERY_PROMPT)).toBeLessThan(result.indexOf(body));
|
||||
});
|
||||
|
||||
it("separates recovery prompt from body with double newline", () => {
|
||||
const body = "test message";
|
||||
const entry = {
|
||||
sessionId: "test",
|
||||
updatedAt: Date.now(),
|
||||
needsPostCompactionRecovery: true,
|
||||
};
|
||||
const result = prependPostCompactionRecovery(body, entry);
|
||||
expect(result).toBe(`${POST_COMPACTION_RECOVERY_PROMPT}\n\n${body}`);
|
||||
});
|
||||
});
|
||||
});
|
||||
103
src/auto-reply/reply/post-compaction-recovery.ts
Normal file
103
src/auto-reply/reply/post-compaction-recovery.ts
Normal file
@@ -0,0 +1,103 @@
|
||||
import type { SessionEntry } from "../../config/sessions.js";
|
||||
import { updateSessionStore } from "../../config/sessions.js";
|
||||
|
||||
/**
|
||||
* Post-compaction recovery prompt injected into the next user message after
|
||||
* auto-compaction completes. Instructs the agent to recall task state from
|
||||
* memory and notify the user about the context reset.
|
||||
*
|
||||
* Kept under 200 tokens to minimize context overhead.
|
||||
*/
|
||||
export const POST_COMPACTION_RECOVERY_PROMPT = [
|
||||
"[Post-compaction recovery — mandatory steps]",
|
||||
"Context was just compacted. Before responding, you MUST:",
|
||||
'1. Run memory_recall("active task") to check for saved task state.',
|
||||
"2. Read TASKS.md if it exists in your workspace.",
|
||||
"3. Compare recovered state against the compaction summary above.",
|
||||
'4. Notify the user: "🔄 Context Reset — last task: [X], resuming from step [Y]" (or summarize what you recall).',
|
||||
"Do NOT skip these steps. Proceed with the user's message after recovery.",
|
||||
].join("\n");
|
||||
|
||||
/**
|
||||
* Check whether the session needs post-compaction recovery and return the
|
||||
* recovery prompt if so. Returns `null` when no recovery is needed.
|
||||
*/
|
||||
export function getPostCompactionRecoveryPrompt(entry?: SessionEntry): string | null {
|
||||
if (!entry?.needsPostCompactionRecovery) {
|
||||
return null;
|
||||
}
|
||||
return POST_COMPACTION_RECOVERY_PROMPT;
|
||||
}
|
||||
|
||||
/**
|
||||
* Prepend the post-compaction recovery prompt to the user's message body.
|
||||
* Returns the original body unchanged if no recovery is needed.
|
||||
*/
|
||||
export function prependPostCompactionRecovery(body: string, entry?: SessionEntry): string {
|
||||
const prompt = getPostCompactionRecoveryPrompt(entry);
|
||||
if (!prompt) {
|
||||
return body;
|
||||
}
|
||||
return `${prompt}\n\n${body}`;
|
||||
}
|
||||
|
||||
/**
|
||||
* Set or clear the post-compaction recovery flag on a session.
|
||||
*/
|
||||
async function setPostCompactionRecovery(
|
||||
value: boolean,
|
||||
params: {
|
||||
sessionEntry?: SessionEntry;
|
||||
sessionStore?: Record<string, SessionEntry>;
|
||||
sessionKey?: string;
|
||||
storePath?: string;
|
||||
},
|
||||
): Promise<void> {
|
||||
const { sessionEntry, sessionStore, sessionKey, storePath } = params;
|
||||
if (!sessionStore || !sessionKey) {
|
||||
return;
|
||||
}
|
||||
const entry = sessionStore[sessionKey] ?? sessionEntry;
|
||||
if (!entry) {
|
||||
return;
|
||||
}
|
||||
sessionStore[sessionKey] = {
|
||||
...entry,
|
||||
needsPostCompactionRecovery: value,
|
||||
};
|
||||
if (storePath) {
|
||||
await updateSessionStore(storePath, (store) => {
|
||||
if (store[sessionKey]) {
|
||||
store[sessionKey] = {
|
||||
...store[sessionKey],
|
||||
needsPostCompactionRecovery: value,
|
||||
};
|
||||
}
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Mark a session as needing post-compaction recovery on the next turn.
|
||||
*/
|
||||
export async function markNeedsPostCompactionRecovery(params: {
|
||||
sessionEntry?: SessionEntry;
|
||||
sessionStore?: Record<string, SessionEntry>;
|
||||
sessionKey?: string;
|
||||
storePath?: string;
|
||||
}): Promise<void> {
|
||||
return setPostCompactionRecovery(true, params);
|
||||
}
|
||||
|
||||
/**
|
||||
* Clear the post-compaction recovery flag after recovery instructions have
|
||||
* been injected into the prompt.
|
||||
*/
|
||||
export async function clearPostCompactionRecovery(params: {
|
||||
sessionEntry?: SessionEntry;
|
||||
sessionStore?: Record<string, SessionEntry>;
|
||||
sessionKey?: string;
|
||||
storePath?: string;
|
||||
}): Promise<void> {
|
||||
return setPostCompactionRecovery(false, params);
|
||||
}
|
||||
@@ -82,6 +82,8 @@ export type SessionEntry = {
|
||||
model?: string;
|
||||
contextTokens?: number;
|
||||
compactionCount?: number;
|
||||
/** Set after auto-compaction; cleared after the next turn injects recovery instructions. */
|
||||
needsPostCompactionRecovery?: boolean;
|
||||
memoryFlushAt?: number;
|
||||
memoryFlushCompactionCount?: number;
|
||||
cliSessionIds?: Record<string, string>;
|
||||
|
||||
Reference in New Issue
Block a user