Files
openclaw/extensions/memory-neo4j/message-utils.ts
Tarun Sukhani e9b9da5a1f memory-neo4j: add userPinned flag, remove demotion, add benchmarking, audit fixes
- Add userPinned boolean on Memory nodes: user-stored core memories are
  immune from importance recalculation, decay, and pruning. Only removable
  via memory_forget. Importance locked at 1.0.
- Add listCoreForInjection(): always injects ALL userPinned core memories
  plus top N non-pinned core memories by importance (no silent drop-off
  for user-pinned memories regardless of maxEntries cap).
- Remove core demotion entirely: promotion is now one-way. Bad core
  memories are handled manually via memory_forget.
- Add [bench] performance timing to auto-recall, auto-capture, core
  memory injection, core refresh, and hybridSearch.
- Audit fixes: remove dead entity/tag methods, dead test blocks, orphaned
  demoteFromCore docstring, unnecessary .slice() in graphSearch.
- Refactor attention gate into shared checks for user/assistant gates.
- Consolidate LLM client, message utils, and config helpers.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
2026-02-16 17:56:39 +08:00

136 lines
5.0 KiB
TypeScript

/**
* Message extraction utilities for the memory pipeline.
*
* Extracts and cleans user/assistant messages from the raw event.messages
* array, stripping channel wrappers, injected context, tool output, and
* other noise so downstream consumers (attention gate, memory store) see
* only the substantive text.
*/
// ============================================================================
// Core Extraction
// ============================================================================
/**
* Extract text blocks from messages with a given role, apply a strip function,
* and filter out short results. Handles both string content and content block arrays.
*/
function extractMessagesByRole(
messages: unknown[],
role: string,
stripFn: (text: string) => string,
): string[] {
const texts: string[] = [];
for (const msg of messages) {
if (!msg || typeof msg !== "object") {
continue;
}
const msgObj = msg as Record<string, unknown>;
if (msgObj.role !== role) {
continue;
}
const content = msgObj.content;
if (typeof content === "string") {
texts.push(content);
continue;
}
if (Array.isArray(content)) {
for (const block of content) {
if (
block &&
typeof block === "object" &&
"type" in block &&
(block as Record<string, unknown>).type === "text" &&
"text" in block &&
typeof (block as Record<string, unknown>).text === "string"
) {
texts.push((block as Record<string, unknown>).text as string);
}
}
}
}
return texts.map(stripFn).filter((t) => t.length >= 10);
}
// ============================================================================
// User Message Extraction
// ============================================================================
/**
* Extract user message texts from the event.messages array.
*/
export function extractUserMessages(messages: unknown[]): string[] {
return extractMessagesByRole(messages, "user", stripMessageWrappers);
}
/**
* Strip injected context, channel metadata wrappers, and system prefixes
* so the attention gate sees only the raw user text.
* Exported for use by the cleanup command.
*/
export function stripMessageWrappers(text: string): string {
let s = text;
// Injected context from memory system
s = s.replace(/<relevant-memories>[\s\S]*?<\/relevant-memories>\s*/g, "");
s = s.replace(/<core-memory-refresh>[\s\S]*?<\/core-memory-refresh>\s*/g, "");
s = s.replace(/<system>[\s\S]*?<\/system>\s*/g, "");
// File attachments (PDFs, images, etc. forwarded inline by channels)
s = s.replace(/<file\b[^>]*>[\s\S]*?<\/file>\s*/g, "");
// Media attachment preamble (appears before Telegram wrapper)
s = s.replace(/^\[media attached:[^\]]*\]\s*(?:To send an image[^\n]*\n?)*/i, "");
// System exec output blocks (may appear before Telegram wrapper)
s = s.replace(/^(?:System:\s*\[[^\]]*\][^\n]*\n?)+/gi, "");
// Voice chat timestamp prefix: [Tue 2026-02-10 19:41 GMT+8]
s = s.replace(
/^\[(?:Mon|Tue|Wed|Thu|Fri|Sat|Sun)\s+\d{4}-\d{2}-\d{2}\s+\d{1,2}:\d{2}\s+GMT[+-]\d+\]\s*/i,
"",
);
// Conversation info metadata block (gateway routing context with JSON code fence)
s = s.replace(/Conversation info\s*\(untrusted metadata\):\s*```[\s\S]*?```\s*/g, "");
// Queued message batch header and separators
s = s.replace(/^\[Queued messages while agent was busy\]\s*/i, "");
s = s.replace(/---\s*Queued #\d+\s*/g, "");
// Telegram wrapper — may now be at start after previous strips
s = s.replace(/^\s*\[Telegram\s[^\]]+\]\s*/i, "");
// "[message_id: ...]" suffix (Telegram and other channel IDs)
s = s.replace(/\n?\[message_id:\s*[^\]]+\]\s*$/i, "");
// Slack wrapper — "[Slack <workspace> #channel @user] MESSAGE [slack message id: ...]"
s = s.replace(/^\s*\[Slack\s[^\]]+\]\s*/i, "");
s = s.replace(/\n?\[slack message id:\s*[^\]]*\]\s*$/i, "");
return s.trim();
}
// ============================================================================
// Assistant Message Extraction
// ============================================================================
/**
* Strip tool-use, thinking, and code-output blocks from assistant messages
* so the attention gate sees only the substantive assistant text.
*/
export function stripAssistantWrappers(text: string): string {
let s = text;
// Tool-use / tool-result / function_call blocks
s = s.replace(/<tool_use>[\s\S]*?<\/tool_use>\s*/g, "");
s = s.replace(/<tool_result>[\s\S]*?<\/tool_result>\s*/g, "");
s = s.replace(/<function_call>[\s\S]*?<\/function_call>\s*/g, "");
// Thinking tags
s = s.replace(/<thinking>[\s\S]*?<\/thinking>\s*/g, "");
s = s.replace(/<antThinking>[\s\S]*?<\/antThinking>\s*/g, "");
// Code execution output
s = s.replace(/<code_output>[\s\S]*?<\/code_output>\s*/g, "");
return s.trim();
}
/**
* Extract assistant message texts from the event.messages array.
*/
export function extractAssistantMessages(messages: unknown[]): string[] {
return extractMessagesByRole(messages, "assistant", stripAssistantWrappers);
}