memory-neo4j: strengthen auto-capture filtering and add Slack metadata stripping

- Raise MIN_CAPTURE_CHARS from 10 to 30 to reject trivially short messages
- Add noise patterns for conversational filler (haha, lol, hmm, etc.)
- Add noise pattern to reject /new and /reset session prompts
- Raise importance threshold for assistant auto-captures to >= 0.7
- Add Slack protocol prefix/suffix stripping in stripMessageWrappers()

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
This commit is contained in:
Tarun Sukhani
2026-02-07 22:50:55 +08:00
parent 4a3d424890
commit 27cb766209
2 changed files with 17 additions and 4 deletions

View File

@@ -1008,8 +1008,11 @@ export function stripMessageWrappers(text: string): string {
s = s.replace(/^(?:System:\s*\[[^\]]*\][^\n]*\n?)+/gi, "");
// Telegram wrapper — may now be at start after previous strips
s = s.replace(/^\s*\[Telegram\s[^\]]+\]\s*/i, "");
// "[message_id: NNN]" suffix
// "[message_id: NNN]" suffix (Telegram)
s = s.replace(/\n?\[message_id:\s*\d+\]\s*$/i, "");
// Slack wrapper — "[Slack <workspace> #channel @user] MESSAGE [slack message id: ...]"
s = s.replace(/^\s*\[Slack\s[^\]]+\]\s*/i, "");
s = s.replace(/\n?\[slack message id:\s*[^\]]*\]\s*$/i, "");
return s.trim();
}

View File

@@ -1151,6 +1151,13 @@ const memoryNeo4jPlugin = {
for (const text of retainedAssistant) {
try {
const importance = await rateImportance(text, extractionConfig);
// Only store assistant messages that are genuinely important
if (importance < 0.7) {
continue;
}
const vector = await embeddings.embed(text);
const existing = await db.findSimilar(vector, 0.95, 1);
@@ -1158,8 +1165,6 @@ const memoryNeo4jPlugin = {
continue;
}
const importance = await rateImportance(text, extractionConfig);
await db.storeMemory({
id: randomUUID(),
text,
@@ -1239,6 +1244,8 @@ const NOISE_PATTERNS = [
/^(ok[,.]?\s+)?(i('ll|'m|'d|'ve)?\s+)?(just\s+)?(need|want|got|have|let|let's|let me|give me|send|do|did|try|check|see|look at|test|take|get|go|use)\s+(it|that|this|those|these|them|some|one|the|a|an|me|him|her|us)\s*(out|up|now|then|too|again|later|first|here|there|please)?\s*[.!?]*$/i,
// Short acknowledgments with trailing context: "ok, ..." / "yes, ..." when total is brief
/^(ok|okay|yes|yeah|yep|sure|no|nope|right|alright|fine|cool|nice|great|perfect)[,.]?\s+.{0,20}$/i,
// Conversational filler / noise phrases (standalone, with optional punctuation)
/^(hmm+|huh|haha|ha|lol|lmao|rofl|nah|meh|idk|brb|ttyl|omg|wow|whoa|welp|oops|ooh|aah|ugh|bleh|pfft|smh|ikr|tbh|imo|fwiw|np|nvm|nm|wut|wat|wha|heh|tsk|sigh|yay|woo+|boo|dang|darn|geez|gosh|sheesh|oof)\s*[.!?]*$/i,
// Single-word or near-empty
/^\S{0,3}$/,
// Pure emoji
@@ -1246,6 +1253,9 @@ const NOISE_PATTERNS = [
// System/XML markup
/^<[a-z-]+>[\s\S]*<\/[a-z-]+>$/i,
// --- Session reset prompts (from /new and /reset commands) ---
/^A new session was started via/i,
// --- System infrastructure messages (never user-generated) ---
// Heartbeat prompts
/Read HEARTBEAT\.md if it exists/i,
@@ -1265,7 +1275,7 @@ const NOISE_PATTERNS = [
const MAX_CAPTURE_CHARS = 2000;
/** Minimum message length — too short to be meaningful. */
const MIN_CAPTURE_CHARS = 10;
const MIN_CAPTURE_CHARS = 30;
/** Minimum word count — short contextual phrases lack standalone meaning. */
const MIN_WORD_COUNT = 5;