Files
openclaw/extensions/memory-neo4j/index.test.ts
Tarun Sukhani e0e98c2c0d memory-neo4j: purge noise, tighten auto-capture filters, cap sleep cycle dedup
- Add 11 ASSISTANT_NARRATION_PATTERNS to reject play-by-play self-talk
  ("Let me check...", "I'll run...", "Starting...", "Good! The...", etc.)
- Cap Phase 1b semantic dedup to 50 pairs (sorted by similarity desc)
  to prevent sleep cycle timeouts on large memory sets
- Raise user auto-capture importance threshold from 0.3 to 0.5
- Raise assistant auto-capture importance threshold from 0.7 to 0.8
- Raise MIN_WORD_COUNT from 5 to 8 for user attention gate
- Neo4j cleanup: deleted 155 noise entries (394→242 memories),
  recategorized 2 misplaced entries, stripped Slack metadata from 1

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-16 17:56:38 +08:00

755 lines
29 KiB
TypeScript

/**
* Tests for the memory-neo4j plugin entry point.
*
* Covers:
* 1. Attention gates (user and assistant) — re-exported from attention-gate.ts
* 2. Message extraction — extractUserMessages, extractAssistantMessages from message-utils.ts
* 3. Strip wrappers — stripMessageWrappers, stripAssistantWrappers from message-utils.ts
*
* Does NOT test the plugin registration or CLI commands (those require the
* full OpenClaw SDK runtime). Focuses on pure functions and the behavioral
* contracts of the auto-capture pipeline helpers.
*/
import { describe, it, expect } from "vitest";
import { passesAttentionGate, passesAssistantAttentionGate } from "./attention-gate.js";
import {
extractUserMessages,
extractAssistantMessages,
stripMessageWrappers,
stripAssistantWrappers,
} from "./message-utils.js";
// ============================================================================
// Test Helpers
// ============================================================================
/** Generate a string of a specific length using a repeating word pattern. */
function makeText(wordCount: number, word = "lorem"): string {
return Array.from({ length: wordCount }, () => word).join(" ");
}
/** Generate a string of a specific character length. */
function makeChars(charCount: number, char = "x"): string {
return char.repeat(charCount);
}
// ============================================================================
// passesAttentionGate() — User Attention Gate
// ============================================================================
describe("passesAttentionGate", () => {
// -----------------------------------------------------------------------
// Length bounds
// -----------------------------------------------------------------------
describe("length bounds", () => {
it("should reject messages shorter than 30 characters", () => {
expect(passesAttentionGate("too short")).toBe(false);
expect(passesAttentionGate("a".repeat(29))).toBe(false);
});
it("should reject messages longer than 2000 characters", () => {
// 2001 chars — exceeds MAX_CAPTURE_CHARS
const longText = makeText(300, "longword");
expect(longText.length).toBeGreaterThan(2000);
expect(passesAttentionGate(longText)).toBe(false);
});
it("should accept messages at exactly 30 characters with sufficient words", () => {
// Need 30+ chars and 8+ words
const text = "ab cd ef gh ij kl mn op qr st u";
expect(text.length).toBeGreaterThanOrEqual(30);
expect(text.split(/\s+/).length).toBeGreaterThanOrEqual(8);
expect(passesAttentionGate(text)).toBe(true);
});
it("should accept messages at exactly 2000 characters with sufficient words", () => {
// Build exactly 2000 chars: repeated "testing " (8 chars each) = 250 words
// 250 * 8 = 2000, but join adds spaces between (not after last), so 250 * 7 + 249 = 1999
// Use a padded approach: fill with "testing " then pad to exactly 2000
const base = "testing ".repeat(249) + "testing"; // 249*8 + 7 = 1999
const text = base + "s"; // 2000 chars
expect(text.length).toBe(2000);
expect(passesAttentionGate(text)).toBe(true);
});
});
// -----------------------------------------------------------------------
// Word count
// -----------------------------------------------------------------------
describe("word count", () => {
it("should reject messages with fewer than 8 words", () => {
// 7 words, but long enough in chars (> 30)
expect(
passesAttentionGate(
"thisislongword anotherlongword thirdlongword fourthlongword fifth sixth seventh",
),
).toBe(false);
});
it("should accept messages with exactly 8 words", () => {
expect(
passesAttentionGate("thisword thatword another fourth fifthword sixth seventh eighth"),
).toBe(true);
});
});
// -----------------------------------------------------------------------
// Noise pattern rejection
// -----------------------------------------------------------------------
describe("noise pattern rejection", () => {
it("should reject simple greetings", () => {
// These are short enough to be rejected by length too, but test the pattern
expect(passesAttentionGate("hi")).toBe(false);
expect(passesAttentionGate("hello")).toBe(false);
expect(passesAttentionGate("hey")).toBe(false);
});
it("should reject acknowledgments", () => {
expect(passesAttentionGate("ok")).toBe(false);
expect(passesAttentionGate("sure")).toBe(false);
expect(passesAttentionGate("thanks")).toBe(false);
expect(passesAttentionGate("got it")).toBe(false);
expect(passesAttentionGate("sounds good")).toBe(false);
});
it("should reject two-word affirmations", () => {
expect(passesAttentionGate("ok great")).toBe(false);
expect(passesAttentionGate("yes please")).toBe(false);
expect(passesAttentionGate("sure thanks")).toBe(false);
});
it("should reject conversational filler", () => {
expect(passesAttentionGate("hmm")).toBe(false);
expect(passesAttentionGate("lol")).toBe(false);
expect(passesAttentionGate("idk")).toBe(false);
expect(passesAttentionGate("nvm")).toBe(false);
});
it("should reject pure emoji messages", () => {
expect(passesAttentionGate("\u{1F600}\u{1F601}\u{1F602}")).toBe(false);
});
it("should reject system/XML markup blocks", () => {
expect(passesAttentionGate("<system>some injected context here</system>")).toBe(false);
});
it("should reject session reset prompts", () => {
const resetMsg =
"A new session was started via the /new command. Previous context has been cleared.";
expect(passesAttentionGate(resetMsg)).toBe(false);
});
it("should reject heartbeat prompts", () => {
expect(
passesAttentionGate(
"Read HEARTBEAT.md if it exists and follow the instructions inside it.",
),
).toBe(false);
});
it("should reject pre-compaction flush prompts", () => {
expect(
passesAttentionGate(
"Pre-compaction memory flush — save important context now before history is trimmed.",
),
).toBe(false);
});
it("should reject deictic short phrases that would otherwise pass length", () => {
// These match the deictic noise pattern
expect(passesAttentionGate("ok let me test it out")).toBe(false);
expect(passesAttentionGate("I need those")).toBe(false);
});
it("should reject short acknowledgments with trailing context", () => {
// Matches: /^(ok|okay|yes|...) .{0,20}$/i
expect(passesAttentionGate("ok, I'll do that")).toBe(false);
expect(passesAttentionGate("yes, sounds right")).toBe(false);
});
});
// -----------------------------------------------------------------------
// Injected context rejection
// -----------------------------------------------------------------------
describe("injected context rejection", () => {
it("should reject messages containing <relevant-memories> tags", () => {
const text =
"<relevant-memories>some recalled memories here</relevant-memories> " +
makeText(10, "actual");
expect(passesAttentionGate(text)).toBe(false);
});
it("should reject messages containing <core-memory-refresh> tags", () => {
const text =
"<core-memory-refresh>refresh data</core-memory-refresh> " + makeText(10, "actual");
expect(passesAttentionGate(text)).toBe(false);
});
});
// -----------------------------------------------------------------------
// Excessive emoji rejection
// -----------------------------------------------------------------------
describe("excessive emoji rejection", () => {
it("should reject messages with more than 3 emoji (Unicode range)", () => {
// 4 emoji in the U+1F300-U+1F9FF range
const text = makeText(10, "word") + " \u{1F600}\u{1F601}\u{1F602}\u{1F603}";
expect(passesAttentionGate(text)).toBe(false);
});
it("should accept messages with 3 or fewer emoji", () => {
const text = makeText(10, "testing") + " \u{1F600}\u{1F601}\u{1F602}";
expect(passesAttentionGate(text)).toBe(true);
});
});
// -----------------------------------------------------------------------
// Substantive messages that should pass
// -----------------------------------------------------------------------
describe("substantive messages", () => {
it("should accept a clear factual statement", () => {
expect(passesAttentionGate("I prefer dark mode for all my code editors and terminals")).toBe(
true,
);
});
it("should accept a preference statement", () => {
expect(
passesAttentionGate(
"My favorite programming language is TypeScript because of its type system",
),
).toBe(true);
});
it("should accept a decision statement", () => {
expect(
passesAttentionGate(
"We decided to use Neo4j for the knowledge graph instead of PostgreSQL",
),
).toBe(true);
});
it("should accept a multi-sentence message", () => {
expect(
passesAttentionGate(
"The deployment pipeline uses GitHub Actions. It builds and tests on every push to main.",
),
).toBe(true);
});
it("should handle leading/trailing whitespace via trimming", () => {
expect(
passesAttentionGate(" I prefer using vitest for testing my TypeScript projects "),
).toBe(true);
});
});
});
// ============================================================================
// passesAssistantAttentionGate() — Assistant Attention Gate
// ============================================================================
describe("passesAssistantAttentionGate", () => {
// -----------------------------------------------------------------------
// Length bounds (stricter than user)
// -----------------------------------------------------------------------
describe("length bounds", () => {
it("should reject messages shorter than 30 characters", () => {
expect(passesAssistantAttentionGate("short msg")).toBe(false);
});
it("should reject messages longer than 1000 characters", () => {
const longText = makeText(200, "wordword");
expect(longText.length).toBeGreaterThan(1000);
expect(passesAssistantAttentionGate(longText)).toBe(false);
});
});
// -----------------------------------------------------------------------
// Word count (higher threshold — 10 words minimum)
// -----------------------------------------------------------------------
describe("word count", () => {
it("should reject messages with fewer than 10 words", () => {
// 9 words, each 5 chars + space = more than 30 chars total
const nineWords = "alpha bravo charm delta eerie found ghost horse india";
expect(nineWords.split(/\s+/).length).toBe(9);
expect(nineWords.length).toBeGreaterThan(30);
expect(passesAssistantAttentionGate(nineWords)).toBe(false);
});
it("should accept messages with exactly 10 words", () => {
const tenWords = "alpha bravo charm delta eerie found ghost horse india julep";
expect(tenWords.split(/\s+/).length).toBe(10);
expect(tenWords.length).toBeGreaterThan(30);
expect(passesAssistantAttentionGate(tenWords)).toBe(true);
});
});
// -----------------------------------------------------------------------
// Code-heavy message rejection (> 50% fenced code)
// -----------------------------------------------------------------------
describe("code-heavy rejection", () => {
it("should reject messages that are more than 50% fenced code blocks", () => {
// ~60 chars of prose + ~200 chars of code block => code > 50%
const text =
"Here is some explanation for the code below that follows.\n" +
"```typescript\n" +
"function example() {\n" +
" const x = 1;\n" +
" const y = 2;\n" +
" return x + y;\n" +
"}\n" +
"function another() {\n" +
" const a = 3;\n" +
" return a * 2;\n" +
"}\n" +
"```";
expect(passesAssistantAttentionGate(text)).toBe(false);
});
it("should accept messages with less than 50% code", () => {
const text =
"The configuration requires setting up the environment variables correctly. " +
"You need to set NEO4J_URI, NEO4J_USER, and NEO4J_PASSWORD. " +
"Make sure the password is at least 8 characters long for security. " +
"```\nNEO4J_URI=bolt://localhost:7687\n```";
expect(passesAssistantAttentionGate(text)).toBe(true);
});
});
// -----------------------------------------------------------------------
// Tool output rejection
// -----------------------------------------------------------------------
describe("tool output rejection", () => {
it("should reject messages containing <tool_result> tags", () => {
const text =
"Here is the result of the search query across all the relevant documents " +
"<tool_result>some result data here</tool_result>";
expect(passesAssistantAttentionGate(text)).toBe(false);
});
it("should reject messages containing <tool_use> tags", () => {
const text =
"I will use this tool to help answer your question about the system setup " +
"<tool_use>tool invocation here</tool_use>";
expect(passesAssistantAttentionGate(text)).toBe(false);
});
it("should reject messages containing <function_call> tags", () => {
const text =
"Calling the function to retrieve the relevant data from the database now " +
"<function_call>fn call here</function_call>";
expect(passesAssistantAttentionGate(text)).toBe(false);
});
});
// -----------------------------------------------------------------------
// Injected context rejection
// -----------------------------------------------------------------------
describe("injected context rejection", () => {
it("should reject messages with <relevant-memories> tags", () => {
const text =
"<relevant-memories>cached recall data</relevant-memories> " + makeText(15, "answer");
expect(passesAssistantAttentionGate(text)).toBe(false);
});
it("should reject messages with <core-memory-refresh> tags", () => {
const text =
"<core-memory-refresh>identity refresh</core-memory-refresh> " + makeText(15, "answer");
expect(passesAssistantAttentionGate(text)).toBe(false);
});
});
// -----------------------------------------------------------------------
// Noise patterns and emoji (shared with user gate)
// -----------------------------------------------------------------------
describe("noise patterns", () => {
it("should reject greeting noise", () => {
expect(passesAssistantAttentionGate("hello")).toBe(false);
});
it("should reject excessive emoji", () => {
const text = makeText(15, "answer") + " \u{1F600}\u{1F601}\u{1F602}\u{1F603}";
expect(passesAssistantAttentionGate(text)).toBe(false);
});
});
// -----------------------------------------------------------------------
// Substantive assistant messages that should pass
// -----------------------------------------------------------------------
describe("substantive assistant messages", () => {
it("should accept a clear explanatory response", () => {
expect(
passesAssistantAttentionGate(
"The Neo4j database uses a property graph model where nodes represent entities and edges represent relationships between them.",
),
).toBe(true);
});
it("should accept a recommendation response", () => {
expect(
passesAssistantAttentionGate(
"Based on your requirements, I recommend using vitest for unit testing because it has native TypeScript support and fast execution times.",
),
).toBe(true);
});
});
});
// ============================================================================
// extractUserMessages()
// ============================================================================
describe("extractUserMessages", () => {
it("should extract text from string content format", () => {
const messages = [{ role: "user", content: "This is a substantive user message for testing" }];
const result = extractUserMessages(messages);
expect(result).toEqual(["This is a substantive user message for testing"]);
});
it("should extract text from content block array format", () => {
const messages = [
{
role: "user",
content: [{ type: "text", text: "This is a substantive user message from a block array" }],
},
];
const result = extractUserMessages(messages);
expect(result).toEqual(["This is a substantive user message from a block array"]);
});
it("should extract multiple text blocks from a single message", () => {
const messages = [
{
role: "user",
content: [
{ type: "text", text: "First text block with enough characters" },
{ type: "image", url: "http://example.com/img.png" },
{ type: "text", text: "Second text block with enough characters" },
],
},
];
const result = extractUserMessages(messages);
expect(result).toHaveLength(2);
expect(result[0]).toBe("First text block with enough characters");
expect(result[1]).toBe("Second text block with enough characters");
});
it("should ignore non-user messages", () => {
const messages = [
{ role: "assistant", content: "I am the assistant response message here" },
{ role: "system", content: "This is the system prompt configuration text" },
{ role: "user", content: "This is the actual user message text here" },
];
const result = extractUserMessages(messages);
expect(result).toEqual(["This is the actual user message text here"]);
});
it("should filter out messages shorter than 10 characters after stripping", () => {
const messages = [
{ role: "user", content: "short" },
{ role: "user", content: "This is a long enough message to pass the filter" },
];
const result = extractUserMessages(messages);
expect(result).toHaveLength(1);
expect(result[0]).toBe("This is a long enough message to pass the filter");
});
it("should strip Telegram wrappers before returning", () => {
const messages = [
{
role: "user",
content:
"[Telegram @user123 in group] The actual user message is right here\n[message_id: 456]",
},
];
const result = extractUserMessages(messages);
expect(result).toEqual(["The actual user message is right here"]);
});
it("should strip Slack wrappers before returning", () => {
const messages = [
{
role: "user",
content:
"[Slack workspace #channel @user] The actual user message text goes here\n[slack message id: abc123]",
},
];
const result = extractUserMessages(messages);
expect(result).toEqual(["The actual user message text goes here"]);
});
it("should strip injected <relevant-memories> context", () => {
const messages = [
{
role: "user",
content:
"<relevant-memories>recalled: user likes dark mode</relevant-memories> What editor do you recommend for me?",
},
];
const result = extractUserMessages(messages);
expect(result).toEqual(["What editor do you recommend for me?"]);
});
it("should handle null and non-object entries gracefully", () => {
const messages = [
null,
undefined,
42,
"string",
{ role: "user", content: "This is a valid message with enough text" },
];
const result = extractUserMessages(messages as unknown[]);
expect(result).toEqual(["This is a valid message with enough text"]);
});
it("should handle empty messages array", () => {
expect(extractUserMessages([])).toEqual([]);
});
it("should ignore content blocks that are not type 'text'", () => {
const messages = [
{
role: "user",
content: [
{ type: "image", url: "http://example.com/photo.jpg" },
{ type: "audio", data: "base64data..." },
],
},
];
const result = extractUserMessages(messages);
expect(result).toEqual([]);
});
});
// ============================================================================
// extractAssistantMessages()
// ============================================================================
describe("extractAssistantMessages", () => {
it("should extract text from string content format", () => {
const messages = [
{ role: "assistant", content: "Here is a substantive assistant response text" },
];
const result = extractAssistantMessages(messages);
expect(result).toEqual(["Here is a substantive assistant response text"]);
});
it("should extract text from content block array format", () => {
const messages = [
{
role: "assistant",
content: [{ type: "text", text: "The assistant provides an answer to your question here" }],
},
];
const result = extractAssistantMessages(messages);
expect(result).toEqual(["The assistant provides an answer to your question here"]);
});
it("should ignore non-assistant messages", () => {
const messages = [
{ role: "user", content: "This is a user message that should be ignored" },
{ role: "assistant", content: "This is the assistant response message here" },
];
const result = extractAssistantMessages(messages);
expect(result).toEqual(["This is the assistant response message here"]);
});
it("should filter out messages shorter than 10 characters after stripping", () => {
const messages = [
{ role: "assistant", content: "short" },
{ role: "assistant", content: "This is a long enough assistant response message" },
];
const result = extractAssistantMessages(messages);
expect(result).toHaveLength(1);
expect(result[0]).toBe("This is a long enough assistant response message");
});
it("should strip tool-use blocks from assistant messages", () => {
const messages = [
{
role: "assistant",
content:
"<tool_use>search function call parameters</tool_use>Here is the answer to your question about configuration",
},
];
const result = extractAssistantMessages(messages);
expect(result).toEqual(["Here is the answer to your question about configuration"]);
});
it("should strip tool_result blocks from assistant messages", () => {
const messages = [
{
role: "assistant",
content:
"The query returned: <tool_result>raw database output here</tool_result> which means the config is correct and working.",
},
];
const result = extractAssistantMessages(messages);
expect(result).toEqual(["The query returned: which means the config is correct and working."]);
});
it("should strip thinking blocks from assistant messages", () => {
const messages = [
{
role: "assistant",
content:
"<thinking>I need to figure out the best approach here</thinking>The best approach is to use a hybrid search combining vector and BM25 signals.",
},
];
const result = extractAssistantMessages(messages);
expect(result).toEqual([
"The best approach is to use a hybrid search combining vector and BM25 signals.",
]);
});
it("should strip code_output blocks from assistant messages", () => {
const messages = [
{
role: "assistant",
content:
"I ran the code: <code_output>stdout: success</code_output> and it completed without any errors at all.",
},
];
const result = extractAssistantMessages(messages);
expect(result).toEqual(["I ran the code: and it completed without any errors at all."]);
});
it("should handle null and non-object entries gracefully", () => {
const messages = [
null,
undefined,
{ role: "assistant", content: "This is a valid assistant response text" },
];
const result = extractAssistantMessages(messages as unknown[]);
expect(result).toEqual(["This is a valid assistant response text"]);
});
it("should handle empty messages array", () => {
expect(extractAssistantMessages([])).toEqual([]);
});
});
// ============================================================================
// stripMessageWrappers()
// ============================================================================
describe("stripMessageWrappers", () => {
it("should strip <relevant-memories> tags and content", () => {
const input =
"<relevant-memories>user likes dark mode</relevant-memories> What editor should I use?";
expect(stripMessageWrappers(input)).toBe("What editor should I use?");
});
it("should strip <core-memory-refresh> tags and content", () => {
const input =
"<core-memory-refresh>identity: Tarun</core-memory-refresh> How do I configure this?";
expect(stripMessageWrappers(input)).toBe("How do I configure this?");
});
it("should strip <system> tags and content", () => {
const input = "<system>You are a helpful assistant.</system> What is the weather?";
expect(stripMessageWrappers(input)).toBe("What is the weather?");
});
it("should strip <file> attachment tags", () => {
const input = '<file name="doc.pdf">base64content</file> Summarize this document for me.';
expect(stripMessageWrappers(input)).toBe("Summarize this document for me.");
});
it("should strip Telegram wrapper and message_id", () => {
const input = "[Telegram @john in private] Please remember my preference\n[message_id: 12345]";
expect(stripMessageWrappers(input)).toBe("Please remember my preference");
});
it("should strip Slack wrapper and slack message id", () => {
const input =
"[Slack acme-corp #general @alice] Please deploy the latest build\n[slack message id: ts-123]";
expect(stripMessageWrappers(input)).toBe("Please deploy the latest build");
});
it("should strip media attachment preamble", () => {
const input =
"[media attached: image/jpeg]\nTo send an image reply with...\n[Telegram @user in private] What is this picture?";
expect(stripMessageWrappers(input)).toBe("What is this picture?");
});
it("should strip System exec output blocks before Telegram wrapper", () => {
const input =
"System: [2024-01-01] exec completed\n[Telegram @user in private] What happened with the deploy?";
expect(stripMessageWrappers(input)).toBe("What happened with the deploy?");
});
it("should handle multiple wrappers in one message", () => {
const input =
"<relevant-memories>recalled facts</relevant-memories> <system>You are helpful.</system> [Telegram @user in group] What is up?";
const result = stripMessageWrappers(input);
expect(result).toBe("What is up?");
});
it("should return trimmed text when no wrappers are present", () => {
expect(stripMessageWrappers(" Just a plain message ")).toBe("Just a plain message");
});
});
// ============================================================================
// stripAssistantWrappers()
// ============================================================================
describe("stripAssistantWrappers", () => {
it("should strip <tool_use> blocks", () => {
const input = "<tool_use>call search</tool_use>The answer is 42.";
expect(stripAssistantWrappers(input)).toBe("The answer is 42.");
});
it("should strip <tool_result> blocks", () => {
const input = "Result: <tool_result>raw output</tool_result> processed successfully.";
// The regex consumes trailing whitespace after the closing tag
expect(stripAssistantWrappers(input)).toBe("Result: processed successfully.");
});
it("should strip <function_call> blocks", () => {
const input = "<function_call>fn(args)</function_call>Done with the operation.";
expect(stripAssistantWrappers(input)).toBe("Done with the operation.");
});
it("should strip <thinking> blocks", () => {
const input = "<thinking>Let me consider...</thinking>I recommend using vitest.";
expect(stripAssistantWrappers(input)).toBe("I recommend using vitest.");
});
it("should strip <antThinking> blocks", () => {
const input = "<antThinking>analyzing the request</antThinking>Here is the analysis.";
expect(stripAssistantWrappers(input)).toBe("Here is the analysis.");
});
it("should strip <code_output> blocks", () => {
const input = "Output: <code_output>success</code_output> everything worked.";
// The regex consumes trailing whitespace after the closing tag
expect(stripAssistantWrappers(input)).toBe("Output: everything worked.");
});
it("should strip multiple wrapper types in one message", () => {
const input =
"<thinking>hmm</thinking><tool_use>search</tool_use>The final answer is here.<tool_result>data</tool_result>";
expect(stripAssistantWrappers(input)).toBe("The final answer is here.");
});
it("should return trimmed text when no wrappers are present", () => {
expect(stripAssistantWrappers(" Plain assistant text ")).toBe("Plain assistant text");
});
});