Files
openclaw/extensions/memory-neo4j/sleep-cycle.task-memory.test.ts
Tarun Sukhani 6ff248fd4e memory-neo4j: task-aware memory filtering (3 layers)
Layer 1 — Recall-time filter (task-filter.ts):
- New module that reads TASKS.md completed tasks and filters recalled
  memories that match completed task IDs or keywords
- Integrated into auto-recall hook as Feature 3 (after score/dedup filters)
- 60-second cache to avoid re-parsing TASKS.md on every message
- 29 new tests

Layer 2 — Sleep cycle Phase 7 (task-memory cleanup):
- New phase cross-references completed tasks with stored memories
- LLM classifies each matched memory as 'lasting' (keep) or 'noise' (delete)
- Conservative: keeps memories on any doubt or LLM failure
- Scans only tasks completed within last 7 days
- New searchMemoriesByKeywords() method on neo4j client
- 16 new tests

Layer 3 — Memory task metadata (taskId field):
- Optional taskId field on MemoryNode, StoreMemoryInput, and search results
- Auto-tags memories during auto-capture when exactly 1 active task exists
- Precise taskId-based filtering at recall time (complements Layer 1)
- findMemoriesByTaskId() and clearTaskIdFromMemories() on neo4j client
- taskId flows through vector, BM25, and graph search signals + RRF fusion
- 20 new tests

All 669 memory-neo4j tests pass. Zero regressions in full suite.
All changes are backward compatible — existing memories without taskId
continue to work. No migration needed.
2026-02-16 17:56:39 +08:00

235 lines
7.8 KiB
TypeScript

/**
* Tests for Phase 7: Task-Memory Cleanup in the sleep cycle.
*
* Tests the LLM classification function and integration with the sleep cycle.
*/
import { describe, it, expect, vi, beforeEach } from "vitest";
import type { ExtractionConfig } from "./config.js";
import { classifyTaskMemory } from "./sleep-cycle.js";
// --------------------------------------------------------------------------
// Mock the LLM client so we don't make real API calls
// --------------------------------------------------------------------------
vi.mock("./llm-client.js", () => ({
callOpenRouter: vi.fn(),
callOpenRouterStream: vi.fn(),
isTransientError: vi.fn(() => false),
}));
// Import the mocked function for controlling behavior per test
import { callOpenRouter } from "./llm-client.js";
const mockCallOpenRouter = vi.mocked(callOpenRouter);
// --------------------------------------------------------------------------
// Helpers
// --------------------------------------------------------------------------
const baseConfig: ExtractionConfig = {
enabled: true,
apiKey: "test-key",
model: "test-model",
baseUrl: "http://localhost:8080",
temperature: 0,
maxRetries: 0,
};
const disabledConfig: ExtractionConfig = {
...baseConfig,
enabled: false,
};
// --------------------------------------------------------------------------
// classifyTaskMemory()
// --------------------------------------------------------------------------
describe("classifyTaskMemory", () => {
beforeEach(() => {
vi.clearAllMocks();
});
it("returns 'noise' for task-specific progress memory", async () => {
mockCallOpenRouter.mockResolvedValueOnce(
JSON.stringify({
classification: "noise",
reason: "This is task-specific progress tracking",
}),
);
const result = await classifyTaskMemory(
"Currently working on TASK-003, step 2: fixing the column alignment in the LinkedIn dashboard",
"Fix LinkedIn Dashboard tab",
baseConfig,
);
expect(result).toBe("noise");
expect(mockCallOpenRouter).toHaveBeenCalledOnce();
});
it("returns 'lasting' for decision/fact memory", async () => {
mockCallOpenRouter.mockResolvedValueOnce(
JSON.stringify({
classification: "lasting",
reason: "Contains a reusable technical decision",
}),
);
const result = await classifyTaskMemory(
"ReActor face swap produces better results than Replicate for video face replacement",
"Implement face swap pipeline",
baseConfig,
);
expect(result).toBe("lasting");
expect(mockCallOpenRouter).toHaveBeenCalledOnce();
});
it("returns 'lasting' when LLM returns null (conservative)", async () => {
mockCallOpenRouter.mockResolvedValueOnce(null);
const result = await classifyTaskMemory("Some ambiguous memory", "Some task", baseConfig);
expect(result).toBe("lasting");
});
it("returns 'lasting' when LLM throws (conservative)", async () => {
mockCallOpenRouter.mockRejectedValueOnce(new Error("network error"));
const result = await classifyTaskMemory("Some memory", "Some task", baseConfig);
expect(result).toBe("lasting");
});
it("returns 'lasting' when LLM returns malformed JSON", async () => {
mockCallOpenRouter.mockResolvedValueOnce("not json at all");
const result = await classifyTaskMemory("Some memory", "Some task", baseConfig);
expect(result).toBe("lasting");
});
it("returns 'lasting' when LLM returns unexpected classification", async () => {
mockCallOpenRouter.mockResolvedValueOnce(JSON.stringify({ classification: "unknown_value" }));
const result = await classifyTaskMemory("Some memory", "Some task", baseConfig);
expect(result).toBe("lasting");
});
it("returns 'lasting' when config is disabled", async () => {
const result = await classifyTaskMemory("Task progress memory", "Some task", disabledConfig);
expect(result).toBe("lasting");
expect(mockCallOpenRouter).not.toHaveBeenCalled();
});
it("passes task title in system prompt", async () => {
mockCallOpenRouter.mockResolvedValueOnce(JSON.stringify({ classification: "lasting" }));
await classifyTaskMemory("Memory text here", "Fix LinkedIn Dashboard tab", baseConfig);
expect(mockCallOpenRouter).toHaveBeenCalledOnce();
const callArgs = mockCallOpenRouter.mock.calls[0];
const messages = callArgs[1] as Array<{ role: string; content: string }>;
expect(messages[0].content).toContain("Fix LinkedIn Dashboard tab");
});
it("passes memory text as user message", async () => {
mockCallOpenRouter.mockResolvedValueOnce(JSON.stringify({ classification: "noise" }));
await classifyTaskMemory(
"Debugging step: checked column B3 alignment",
"Fix Dashboard",
baseConfig,
);
const callArgs = mockCallOpenRouter.mock.calls[0];
const messages = callArgs[1] as Array<{ role: string; content: string }>;
expect(messages[1].role).toBe("user");
expect(messages[1].content).toBe("Debugging step: checked column B3 alignment");
});
it("passes abort signal to LLM call", async () => {
const controller = new AbortController();
mockCallOpenRouter.mockResolvedValueOnce(JSON.stringify({ classification: "lasting" }));
await classifyTaskMemory("Memory text", "Task title", baseConfig, controller.signal);
const callArgs = mockCallOpenRouter.mock.calls[0];
expect(callArgs[2]).toBe(controller.signal);
});
});
// --------------------------------------------------------------------------
// Classification examples — verify the prompt produces expected behavior
// These test that noise vs lasting classification is passed through correctly
// --------------------------------------------------------------------------
describe("classifyTaskMemory classification examples", () => {
beforeEach(() => {
vi.clearAllMocks();
});
const noiseExamples = [
{
memory: "Currently working on TASK-003, step 2: fixing the column alignment",
task: "Fix LinkedIn Dashboard tab",
reason: "task progress update",
},
{
memory: "ACTIVE TASK: TASK-004 — Fix browser port collision. Step: testing port 18807",
task: "Fix browser port collision",
reason: "active task checkpoint",
},
{
memory: "Debugging the flight search: Scoot API returned 500, retrying with different dates",
task: "Book KL↔Singapore flights for India trip",
reason: "debugging steps",
},
];
for (const example of noiseExamples) {
it(`classifies "${example.reason}" as noise`, async () => {
mockCallOpenRouter.mockResolvedValueOnce(
JSON.stringify({ classification: "noise", reason: example.reason }),
);
const result = await classifyTaskMemory(example.memory, example.task, baseConfig);
expect(result).toBe("noise");
});
}
const lastingExamples = [
{
memory:
"Port map: 18792 (chrome), 18800 (chetan), 18805 (linkedin), 18806 (tsukhani), 18807 (openclaw)",
task: "Fix browser port collision",
reason: "useful reference configuration",
},
{
memory:
"Dashboard layout: B3:B9 = Total, Accepted, Pending, Not Connected, Follow-ups Sent, Acceptance Rate%, Date",
task: "Fix LinkedIn Dashboard tab",
reason: "lasting documentation of layout",
},
{
memory: "ReActor face swap produces better results than Replicate for video face replacement",
task: "Implement face swap pipeline",
reason: "tool comparison decision",
},
];
for (const example of lastingExamples) {
it(`classifies "${example.reason}" as lasting`, async () => {
mockCallOpenRouter.mockResolvedValueOnce(
JSON.stringify({ classification: "lasting", reason: example.reason }),
);
const result = await classifyTaskMemory(example.memory, example.task, baseConfig);
expect(result).toBe("lasting");
});
}
});