mirror of
https://github.com/openclaw/openclaw.git
synced 2026-05-21 10:04:58 +00:00
memory-neo4j: purge noise, tighten auto-capture filters, cap sleep cycle dedup
- Add 11 ASSISTANT_NARRATION_PATTERNS to reject play-by-play self-talk
("Let me check...", "I'll run...", "Starting...", "Good! The...", etc.)
- Cap Phase 1b semantic dedup to 50 pairs (sorted by similarity desc)
to prevent sleep cycle timeouts on large memory sets
- Raise user auto-capture importance threshold from 0.3 to 0.5
- Raise assistant auto-capture importance threshold from 0.7 to 0.8
- Raise MIN_WORD_COUNT from 5 to 8 for user attention gate
- Neo4j cleanup: deleted 155 noise entries (394→242 memories),
recategorized 2 misplaced entries, stripped Slack metadata from 1
Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
@@ -29,6 +29,11 @@ const NOISE_PATTERNS = [
|
||||
// --- Session reset prompts (from /new and /reset commands) ---
|
||||
/^A new session was started via/i,
|
||||
|
||||
// --- Raw chat messages with channel metadata (autocaptured noise) ---
|
||||
/\[slack message id:/i,
|
||||
/\[message_id:/i,
|
||||
/\[telegram message id:/i,
|
||||
|
||||
// --- System infrastructure messages (never user-generated) ---
|
||||
// Heartbeat prompts
|
||||
/Read HEARTBEAT\.md if it exists/i,
|
||||
@@ -51,7 +56,7 @@ const MAX_CAPTURE_CHARS = 2000;
|
||||
const MIN_CAPTURE_CHARS = 30;
|
||||
|
||||
/** Minimum word count — short contextual phrases lack standalone meaning. */
|
||||
const MIN_WORD_COUNT = 5;
|
||||
const MIN_WORD_COUNT = 8;
|
||||
|
||||
export function passesAttentionGate(text: string): boolean {
|
||||
const trimmed = text.trim();
|
||||
@@ -100,6 +105,34 @@ const MAX_ASSISTANT_CAPTURE_CHARS = 1000;
|
||||
/** Minimum word count for assistant messages — higher than user. */
|
||||
const MIN_ASSISTANT_WORD_COUNT = 10;
|
||||
|
||||
/**
|
||||
* Patterns that reject assistant self-narration — play-by-play commentary
|
||||
* that reads like thinking out loud rather than a conclusion or fact.
|
||||
* These are the single biggest source of noise in auto-captured assistant memories.
|
||||
*/
|
||||
const ASSISTANT_NARRATION_PATTERNS = [
|
||||
// "Let me ..." / "Now let me ..." / "I'll ..." action narration
|
||||
/^(ok[,.]?\s+)?(now\s+)?let me\s+(check|look|see|try|run|start|test|read|update|verify|fix|search|process|create|build|set up|examine|investigate|query|fetch|pull|scan|clean|install|download|configure)/i,
|
||||
// "I'll ..." action narration
|
||||
/^I('ll| will)\s+(check|look|see|try|run|start|test|read|update|verify|fix|search|process|create|build|set up|examine|investigate|query|fetch|pull|scan|clean|install|download|configure|execute|help|handle)/i,
|
||||
// "Starting ..." / "Running ..." / "Processing ..." status updates
|
||||
/^(starting|running|processing|checking|fetching|scanning|building|installing|downloading|configuring|executing|loading|updating)\s/i,
|
||||
// "Good!" / "Great!" / "Perfect!" as opener followed by narration
|
||||
/^(good|great|perfect|nice|excellent|awesome|done)[!.]?\s+(i |the |now |let |we |that )/i,
|
||||
// Progress narration: "Now I have..." / "Now I can see..." / "Now let me..."
|
||||
/^now\s+(i\s+(have|can|need|see|understand)|we\s+(have|can|need)|the\s)/i,
|
||||
// Step narration: "Step 1:" / "**Step 1:**"
|
||||
/^\*?\*?step\s+\d/i,
|
||||
// Narration of what was found/done: "Found it." / "Found X." / "I see — ..."
|
||||
/^(found it|found the|i see\s*[—–-])/i,
|
||||
// Sub-agent task descriptions (workflow narration)
|
||||
/^\[?(mon|tue|wed|thu|fri|sat|sun)\s+\d{4}-\d{2}-\d{2}/i,
|
||||
// Context compaction self-announcements
|
||||
/^🔄\s*\*?\*?context reset/i,
|
||||
// Filename slug generation prompts (internal tool use)
|
||||
/^based on this conversation,?\s*generate a short/i,
|
||||
];
|
||||
|
||||
export function passesAssistantAttentionGate(text: string): boolean {
|
||||
const trimmed = text.trim();
|
||||
|
||||
@@ -144,6 +177,11 @@ export function passesAssistantAttentionGate(text: string): boolean {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Assistant-specific narration patterns (play-by-play self-talk)
|
||||
if (ASSISTANT_NARRATION_PATTERNS.some((r) => r.test(trimmed))) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Excessive emoji (likely reaction, not substance)
|
||||
const emojiCount = (
|
||||
trimmed.match(/[\u{1F300}-\u{1F9FF}\u{2600}-\u{26FF}\u{2700}-\u{27BF}\u{1FA00}-\u{1FAFF}]/gu) ||
|
||||
|
||||
@@ -146,14 +146,26 @@ describe("passesAttentionGate", () => {
|
||||
});
|
||||
|
||||
it("should accept messages with specific information/preferences", () => {
|
||||
expect(passesAttentionGate("I prefer using TypeScript over JavaScript")).toBe(true);
|
||||
expect(passesAttentionGate("My meeting with John is on Thursday")).toBe(true);
|
||||
expect(passesAttentionGate("The project deadline was moved to March")).toBe(true);
|
||||
expect(
|
||||
passesAttentionGate("I strongly prefer using TypeScript over JavaScript for all projects"),
|
||||
).toBe(true);
|
||||
expect(
|
||||
passesAttentionGate("My important meeting with John is scheduled for Thursday afternoon"),
|
||||
).toBe(true);
|
||||
expect(
|
||||
passesAttentionGate("The project deadline was moved to March due to client feedback"),
|
||||
).toBe(true);
|
||||
});
|
||||
|
||||
it("should accept actionable requests with context", () => {
|
||||
expect(passesAttentionGate("Let's limit the wa-group-monitoring to business hours")).toBe(true);
|
||||
expect(passesAttentionGate("Can you check the error logs on the production server")).toBe(true);
|
||||
expect(
|
||||
passesAttentionGate("Let's limit the wa-group-monitoring cron job to business hours only"),
|
||||
).toBe(true);
|
||||
expect(
|
||||
passesAttentionGate(
|
||||
"Can you check the error logs on the production server for recent failures",
|
||||
),
|
||||
).toBe(true);
|
||||
});
|
||||
});
|
||||
|
||||
@@ -1334,6 +1346,84 @@ describe("passesAssistantAttentionGate", () => {
|
||||
expect(passesAssistantAttentionGate("ok")).toBe(false);
|
||||
expect(passesAssistantAttentionGate("sounds good")).toBe(false);
|
||||
});
|
||||
|
||||
it("should reject 'Let me...' action narration", () => {
|
||||
expect(
|
||||
passesAssistantAttentionGate(
|
||||
"Let me check the error logs on the production server for recent failures and report back.",
|
||||
),
|
||||
).toBe(false);
|
||||
expect(
|
||||
passesAssistantAttentionGate(
|
||||
"Now let me update the dashboard and send the Slack report with today's results:",
|
||||
),
|
||||
).toBe(false);
|
||||
expect(
|
||||
passesAssistantAttentionGate(
|
||||
"Let me run the LinkedIn parallel outreach job and start by setting up the search term rotation.",
|
||||
),
|
||||
).toBe(false);
|
||||
});
|
||||
|
||||
it("should reject 'I'll...' action narration", () => {
|
||||
expect(
|
||||
passesAssistantAttentionGate(
|
||||
"I'll run the email labeler to classify any unread, unlabeled emails right now.",
|
||||
),
|
||||
).toBe(false);
|
||||
expect(
|
||||
passesAssistantAttentionGate(
|
||||
"I'll check for newly accepted LinkedIn connections and update the tracker spreadsheet.",
|
||||
),
|
||||
).toBe(false);
|
||||
});
|
||||
|
||||
it("should reject 'Starting/Running/Processing...' status updates", () => {
|
||||
expect(
|
||||
passesAssistantAttentionGate(
|
||||
"Starting LinkedIn outreach for Training category using profile linkedin-3 with isolated browser.",
|
||||
),
|
||||
).toBe(false);
|
||||
expect(
|
||||
passesAssistantAttentionGate(
|
||||
"Processing through extraction steadily doing eight at a time against local Qwen model.",
|
||||
),
|
||||
).toBe(false);
|
||||
});
|
||||
|
||||
it("should reject 'Good!/Perfect!' opener narration", () => {
|
||||
expect(
|
||||
passesAssistantAttentionGate(
|
||||
"Good! I can see the search results. I've identified several 2nd-degree prospects to connect with.",
|
||||
),
|
||||
).toBe(false);
|
||||
expect(
|
||||
passesAssistantAttentionGate(
|
||||
"Perfect! The connection dialog appeared. I'll click Add a note to add the personalized message.",
|
||||
),
|
||||
).toBe(false);
|
||||
});
|
||||
|
||||
it("should reject context compaction announcements", () => {
|
||||
expect(
|
||||
passesAssistantAttentionGate(
|
||||
"\u{1F504} **Context Reset** \u{2014} My memory was just compacted. Last thing I remember: setting up Flux 2.",
|
||||
),
|
||||
).toBe(false);
|
||||
});
|
||||
|
||||
it("should still accept substantive assistant conclusions", () => {
|
||||
expect(
|
||||
passesAssistantAttentionGate(
|
||||
"The memory-neo4j plugin uses confidence-weighted RRF for search result fusion and a 3-signal hybrid search combining HNSW, BM25, and graph traversal.",
|
||||
),
|
||||
).toBe(true);
|
||||
expect(
|
||||
passesAssistantAttentionGate(
|
||||
"Whisper wins accuracy across all tests while SenseVoice wins speed at seventeen to thirty-four times faster processing.",
|
||||
),
|
||||
).toBe(true);
|
||||
});
|
||||
});
|
||||
|
||||
// ============================================================================
|
||||
|
||||
@@ -806,6 +806,23 @@ export async function runSleepCycle(
|
||||
}
|
||||
}
|
||||
|
||||
// Cap the number of LLM-checked pairs to prevent sleep cycle timeouts.
|
||||
// Sort by similarity descending so higher-similarity pairs (more likely
|
||||
// to be duplicates) are checked first.
|
||||
const MAX_SEMANTIC_DEDUP_PAIRS = 50;
|
||||
if (allPairs.length > MAX_SEMANTIC_DEDUP_PAIRS) {
|
||||
allPairs.sort((a, b) => (b.similarity ?? 0) - (a.similarity ?? 0));
|
||||
const skipped = allPairs.length - MAX_SEMANTIC_DEDUP_PAIRS;
|
||||
allPairs.length = MAX_SEMANTIC_DEDUP_PAIRS;
|
||||
onProgress?.(
|
||||
"semanticDedup",
|
||||
`Capped at ${MAX_SEMANTIC_DEDUP_PAIRS} pairs (${skipped} lower-similarity pairs skipped)`,
|
||||
);
|
||||
logger.info(
|
||||
`memory-neo4j: [sleep] Phase 1b capped to ${MAX_SEMANTIC_DEDUP_PAIRS} pairs (${skipped} skipped)`,
|
||||
);
|
||||
}
|
||||
|
||||
// Process pairs in concurrent batches
|
||||
const invalidatedIds = new Set<string>();
|
||||
|
||||
|
||||
@@ -57,11 +57,10 @@ describe("passesAttentionGate", () => {
|
||||
});
|
||||
|
||||
it("should accept messages at exactly 30 characters with sufficient words", () => {
|
||||
// 30 chars, 5 words: "abcde abcde abcde abcde abcde" = 29 chars (5*5 + 4 spaces)
|
||||
// Need 30+ chars and 5+ words
|
||||
const text = "abcdef abcdef abcdef abcdef ab";
|
||||
expect(text.length).toBe(30);
|
||||
expect(text.split(/\s+/).length).toBeGreaterThanOrEqual(5);
|
||||
// Need 30+ chars and 8+ words
|
||||
const text = "ab cd ef gh ij kl mn op qr st u";
|
||||
expect(text.length).toBeGreaterThanOrEqual(30);
|
||||
expect(text.split(/\s+/).length).toBeGreaterThanOrEqual(8);
|
||||
expect(passesAttentionGate(text)).toBe(true);
|
||||
});
|
||||
|
||||
@@ -81,15 +80,19 @@ describe("passesAttentionGate", () => {
|
||||
// -----------------------------------------------------------------------
|
||||
|
||||
describe("word count", () => {
|
||||
it("should reject messages with fewer than 5 words", () => {
|
||||
// 4 words, but long enough in chars (> 30)
|
||||
it("should reject messages with fewer than 8 words", () => {
|
||||
// 7 words, but long enough in chars (> 30)
|
||||
expect(
|
||||
passesAttentionGate("thisislongword anotherlongword thirdlongword fourthlongword"),
|
||||
passesAttentionGate(
|
||||
"thisislongword anotherlongword thirdlongword fourthlongword fifth sixth seventh",
|
||||
),
|
||||
).toBe(false);
|
||||
});
|
||||
|
||||
it("should accept messages with exactly 5 words", () => {
|
||||
expect(passesAttentionGate("thisword thatword another fourth fifthword")).toBe(true);
|
||||
it("should accept messages with exactly 8 words", () => {
|
||||
expect(
|
||||
passesAttentionGate("thisword thatword another fourth fifthword sixth seventh eighth"),
|
||||
).toBe(true);
|
||||
});
|
||||
});
|
||||
|
||||
|
||||
@@ -1285,7 +1285,7 @@ async function runAutoCapture(
|
||||
const result = await captureMessage(
|
||||
text,
|
||||
"auto-capture",
|
||||
0.3,
|
||||
0.5,
|
||||
1.0,
|
||||
agentId,
|
||||
sessionKey,
|
||||
@@ -1312,7 +1312,7 @@ async function runAutoCapture(
|
||||
const result = await captureMessage(
|
||||
text,
|
||||
"auto-capture-assistant",
|
||||
0.7,
|
||||
0.8,
|
||||
0.75,
|
||||
agentId,
|
||||
sessionKey,
|
||||
|
||||
Reference in New Issue
Block a user