mirror of
https://github.com/openclaw/openclaw.git
synced 2026-05-30 14:56:51 +00:00
Layer 1 — Recall-time filter (task-filter.ts): - New module that reads TASKS.md completed tasks and filters recalled memories that match completed task IDs or keywords - Integrated into auto-recall hook as Feature 3 (after score/dedup filters) - 60-second cache to avoid re-parsing TASKS.md on every message - 29 new tests Layer 2 — Sleep cycle Phase 7 (task-memory cleanup): - New phase cross-references completed tasks with stored memories - LLM classifies each matched memory as 'lasting' (keep) or 'noise' (delete) - Conservative: keeps memories on any doubt or LLM failure - Scans only tasks completed within last 7 days - New searchMemoriesByKeywords() method on neo4j client - 16 new tests Layer 3 — Memory task metadata (taskId field): - Optional taskId field on MemoryNode, StoreMemoryInput, and search results - Auto-tags memories during auto-capture when exactly 1 active task exists - Precise taskId-based filtering at recall time (complements Layer 1) - findMemoriesByTaskId() and clearTaskIdFromMemories() on neo4j client - taskId flows through vector, BM25, and graph search signals + RRF fusion - 20 new tests All 669 memory-neo4j tests pass. Zero regressions in full suite. All changes are backward compatible — existing memories without taskId continue to work. No migration needed.
235 lines
7.8 KiB
TypeScript
235 lines
7.8 KiB
TypeScript
/**
|
|
* Tests for Phase 7: Task-Memory Cleanup in the sleep cycle.
|
|
*
|
|
* Tests the LLM classification function and integration with the sleep cycle.
|
|
*/
|
|
|
|
import { describe, it, expect, vi, beforeEach } from "vitest";
|
|
import type { ExtractionConfig } from "./config.js";
|
|
import { classifyTaskMemory } from "./sleep-cycle.js";
|
|
|
|
// --------------------------------------------------------------------------
|
|
// Mock the LLM client so we don't make real API calls
|
|
// --------------------------------------------------------------------------
|
|
vi.mock("./llm-client.js", () => ({
|
|
callOpenRouter: vi.fn(),
|
|
callOpenRouterStream: vi.fn(),
|
|
isTransientError: vi.fn(() => false),
|
|
}));
|
|
|
|
// Import the mocked function for controlling behavior per test
|
|
import { callOpenRouter } from "./llm-client.js";
|
|
const mockCallOpenRouter = vi.mocked(callOpenRouter);
|
|
|
|
// --------------------------------------------------------------------------
|
|
// Helpers
|
|
// --------------------------------------------------------------------------
|
|
|
|
const baseConfig: ExtractionConfig = {
|
|
enabled: true,
|
|
apiKey: "test-key",
|
|
model: "test-model",
|
|
baseUrl: "http://localhost:8080",
|
|
temperature: 0,
|
|
maxRetries: 0,
|
|
};
|
|
|
|
const disabledConfig: ExtractionConfig = {
|
|
...baseConfig,
|
|
enabled: false,
|
|
};
|
|
|
|
// --------------------------------------------------------------------------
|
|
// classifyTaskMemory()
|
|
// --------------------------------------------------------------------------
|
|
|
|
describe("classifyTaskMemory", () => {
|
|
beforeEach(() => {
|
|
vi.clearAllMocks();
|
|
});
|
|
|
|
it("returns 'noise' for task-specific progress memory", async () => {
|
|
mockCallOpenRouter.mockResolvedValueOnce(
|
|
JSON.stringify({
|
|
classification: "noise",
|
|
reason: "This is task-specific progress tracking",
|
|
}),
|
|
);
|
|
|
|
const result = await classifyTaskMemory(
|
|
"Currently working on TASK-003, step 2: fixing the column alignment in the LinkedIn dashboard",
|
|
"Fix LinkedIn Dashboard tab",
|
|
baseConfig,
|
|
);
|
|
|
|
expect(result).toBe("noise");
|
|
expect(mockCallOpenRouter).toHaveBeenCalledOnce();
|
|
});
|
|
|
|
it("returns 'lasting' for decision/fact memory", async () => {
|
|
mockCallOpenRouter.mockResolvedValueOnce(
|
|
JSON.stringify({
|
|
classification: "lasting",
|
|
reason: "Contains a reusable technical decision",
|
|
}),
|
|
);
|
|
|
|
const result = await classifyTaskMemory(
|
|
"ReActor face swap produces better results than Replicate for video face replacement",
|
|
"Implement face swap pipeline",
|
|
baseConfig,
|
|
);
|
|
|
|
expect(result).toBe("lasting");
|
|
expect(mockCallOpenRouter).toHaveBeenCalledOnce();
|
|
});
|
|
|
|
it("returns 'lasting' when LLM returns null (conservative)", async () => {
|
|
mockCallOpenRouter.mockResolvedValueOnce(null);
|
|
|
|
const result = await classifyTaskMemory("Some ambiguous memory", "Some task", baseConfig);
|
|
|
|
expect(result).toBe("lasting");
|
|
});
|
|
|
|
it("returns 'lasting' when LLM throws (conservative)", async () => {
|
|
mockCallOpenRouter.mockRejectedValueOnce(new Error("network error"));
|
|
|
|
const result = await classifyTaskMemory("Some memory", "Some task", baseConfig);
|
|
|
|
expect(result).toBe("lasting");
|
|
});
|
|
|
|
it("returns 'lasting' when LLM returns malformed JSON", async () => {
|
|
mockCallOpenRouter.mockResolvedValueOnce("not json at all");
|
|
|
|
const result = await classifyTaskMemory("Some memory", "Some task", baseConfig);
|
|
|
|
expect(result).toBe("lasting");
|
|
});
|
|
|
|
it("returns 'lasting' when LLM returns unexpected classification", async () => {
|
|
mockCallOpenRouter.mockResolvedValueOnce(JSON.stringify({ classification: "unknown_value" }));
|
|
|
|
const result = await classifyTaskMemory("Some memory", "Some task", baseConfig);
|
|
|
|
expect(result).toBe("lasting");
|
|
});
|
|
|
|
it("returns 'lasting' when config is disabled", async () => {
|
|
const result = await classifyTaskMemory("Task progress memory", "Some task", disabledConfig);
|
|
|
|
expect(result).toBe("lasting");
|
|
expect(mockCallOpenRouter).not.toHaveBeenCalled();
|
|
});
|
|
|
|
it("passes task title in system prompt", async () => {
|
|
mockCallOpenRouter.mockResolvedValueOnce(JSON.stringify({ classification: "lasting" }));
|
|
|
|
await classifyTaskMemory("Memory text here", "Fix LinkedIn Dashboard tab", baseConfig);
|
|
|
|
expect(mockCallOpenRouter).toHaveBeenCalledOnce();
|
|
const callArgs = mockCallOpenRouter.mock.calls[0];
|
|
const messages = callArgs[1] as Array<{ role: string; content: string }>;
|
|
expect(messages[0].content).toContain("Fix LinkedIn Dashboard tab");
|
|
});
|
|
|
|
it("passes memory text as user message", async () => {
|
|
mockCallOpenRouter.mockResolvedValueOnce(JSON.stringify({ classification: "noise" }));
|
|
|
|
await classifyTaskMemory(
|
|
"Debugging step: checked column B3 alignment",
|
|
"Fix Dashboard",
|
|
baseConfig,
|
|
);
|
|
|
|
const callArgs = mockCallOpenRouter.mock.calls[0];
|
|
const messages = callArgs[1] as Array<{ role: string; content: string }>;
|
|
expect(messages[1].role).toBe("user");
|
|
expect(messages[1].content).toBe("Debugging step: checked column B3 alignment");
|
|
});
|
|
|
|
it("passes abort signal to LLM call", async () => {
|
|
const controller = new AbortController();
|
|
mockCallOpenRouter.mockResolvedValueOnce(JSON.stringify({ classification: "lasting" }));
|
|
|
|
await classifyTaskMemory("Memory text", "Task title", baseConfig, controller.signal);
|
|
|
|
const callArgs = mockCallOpenRouter.mock.calls[0];
|
|
expect(callArgs[2]).toBe(controller.signal);
|
|
});
|
|
});
|
|
|
|
// --------------------------------------------------------------------------
|
|
// Classification examples — verify the prompt produces expected behavior
|
|
// These test that noise vs lasting classification is passed through correctly
|
|
// --------------------------------------------------------------------------
|
|
|
|
describe("classifyTaskMemory classification examples", () => {
|
|
beforeEach(() => {
|
|
vi.clearAllMocks();
|
|
});
|
|
|
|
const noiseExamples = [
|
|
{
|
|
memory: "Currently working on TASK-003, step 2: fixing the column alignment",
|
|
task: "Fix LinkedIn Dashboard tab",
|
|
reason: "task progress update",
|
|
},
|
|
{
|
|
memory: "ACTIVE TASK: TASK-004 — Fix browser port collision. Step: testing port 18807",
|
|
task: "Fix browser port collision",
|
|
reason: "active task checkpoint",
|
|
},
|
|
{
|
|
memory: "Debugging the flight search: Scoot API returned 500, retrying with different dates",
|
|
task: "Book KL↔Singapore flights for India trip",
|
|
reason: "debugging steps",
|
|
},
|
|
];
|
|
|
|
for (const example of noiseExamples) {
|
|
it(`classifies "${example.reason}" as noise`, async () => {
|
|
mockCallOpenRouter.mockResolvedValueOnce(
|
|
JSON.stringify({ classification: "noise", reason: example.reason }),
|
|
);
|
|
|
|
const result = await classifyTaskMemory(example.memory, example.task, baseConfig);
|
|
|
|
expect(result).toBe("noise");
|
|
});
|
|
}
|
|
|
|
const lastingExamples = [
|
|
{
|
|
memory:
|
|
"Port map: 18792 (chrome), 18800 (chetan), 18805 (linkedin), 18806 (tsukhani), 18807 (openclaw)",
|
|
task: "Fix browser port collision",
|
|
reason: "useful reference configuration",
|
|
},
|
|
{
|
|
memory:
|
|
"Dashboard layout: B3:B9 = Total, Accepted, Pending, Not Connected, Follow-ups Sent, Acceptance Rate%, Date",
|
|
task: "Fix LinkedIn Dashboard tab",
|
|
reason: "lasting documentation of layout",
|
|
},
|
|
{
|
|
memory: "ReActor face swap produces better results than Replicate for video face replacement",
|
|
task: "Implement face swap pipeline",
|
|
reason: "tool comparison decision",
|
|
},
|
|
];
|
|
|
|
for (const example of lastingExamples) {
|
|
it(`classifies "${example.reason}" as lasting`, async () => {
|
|
mockCallOpenRouter.mockResolvedValueOnce(
|
|
JSON.stringify({ classification: "lasting", reason: example.reason }),
|
|
);
|
|
|
|
const result = await classifyTaskMemory(example.memory, example.task, baseConfig);
|
|
|
|
expect(result).toBe("lasting");
|
|
});
|
|
}
|
|
});
|