fix(ui): strip injected inbound metadata from user messages in history (#22142)

* fix(ui): strip injected inbound metadata from user messages in history Fixes #21106 Fixes #21109 Fixes #22116 OpenClaw prepends structured metadata blocks ("Conversation info", "Sender:", reply-context) to user messages before sending them to the LLM. These blocks are intentionally AI-context-only and must never reach the chat history that users see. Root cause: `buildInboundUserContextPrefix` in `inbound-meta.ts` prepends the blocks directly to the stored user message content string, so they are persisted verbatim and later shown in webchat, TUI, and every other rendering surface. Fix: • `src/auto-reply/reply/strip-inbound-meta.ts` — new utility with a 6-sentinel fast-path strip (zero-alloc on miss) + 9-test suite. • `src/tui/tui-session-actions.ts` — wraps `chatLog.addUser(...)` with `stripInboundMetadata()` so the TUI never stores the prefix. • `ui/src/ui/chat/message-normalizer.ts` — strips user-role text content items during normalisation so webchat renders clean messages. * fix(ui): strip inbound metadata for user messages in display path * test: fix discord component send test spread typing * fix: strip inbound metadata from mac chat history decode * fix: align Swift metadata stripping parser with TS implementation * fix: normalize line endings in inbound metadata stripper * chore: document Swift/TS metadata-sentinel ownership * chore: update changelog for inbound metadata strip fix * changelog: credit Mellowambience for 22142 --------- Co-authored-by: Vincent Koc <vincentkoc@ieee.org>
2026-05-07 06:11:37 +00:00 · 2026-02-20 20:35:13 -05:00
parent f555835b09
commit a4e7e952e1
11 changed files with 420 additions and 13 deletions
--- a/src/auto-reply/reply/strip-inbound-meta.test.ts
+++ b/src/auto-reply/reply/strip-inbound-meta.test.ts
@@ -0,0 +1,85 @@
+import { describe, it, expect } from "vitest";
+import { stripInboundMetadata } from "./strip-inbound-meta.js";
+
+const CONV_BLOCK = `Conversation info (untrusted metadata):
+\`\`\`json
+{
+  "message_id": "msg-abc",
+  "sender": "+1555000"
+}
+\`\`\``;
+
+const SENDER_BLOCK = `Sender (untrusted metadata):
+\`\`\`json
+{
+  "label": "Alice",
+  "name": "Alice"
+}
+\`\`\``;
+
+const REPLY_BLOCK = `Replied message (untrusted, for context):
+\`\`\`json
+{
+  "body": "What time is it?"
+}
+\`\`\``;
+
+describe("stripInboundMetadata", () => {
+  it("fast-path: returns same string when no sentinels present", () => {
+    const text = "Hello, how are you?";
+    expect(stripInboundMetadata(text)).toBe(text);
+  });
+
+  it("fast-path: returns empty string unchanged", () => {
+    expect(stripInboundMetadata("")).toBe("");
+  });
+
+  it("strips a single Conversation info block", () => {
+    const input = `${CONV_BLOCK}\n\nWhat is the weather today?`;
+    expect(stripInboundMetadata(input)).toBe("What is the weather today?");
+  });
+
+  it("strips multiple chained metadata blocks", () => {
+    const input = `${CONV_BLOCK}\n\n${SENDER_BLOCK}\n\nCan you help me?`;
+    expect(stripInboundMetadata(input)).toBe("Can you help me?");
+  });
+
+  it("strips Replied message block leaving user message intact", () => {
+    const input = `${REPLY_BLOCK}\n\nGot it, thanks!`;
+    expect(stripInboundMetadata(input)).toBe("Got it, thanks!");
+  });
+
+  it("strips all six known sentinel types", () => {
+    const sentinels = [
+      "Conversation info (untrusted metadata):",
+      "Sender (untrusted metadata):",
+      "Thread starter (untrusted, for context):",
+      "Replied message (untrusted, for context):",
+      "Forwarded message context (untrusted metadata):",
+      "Chat history since last reply (untrusted, for context):",
+    ];
+    for (const sentinel of sentinels) {
+      const input = `${sentinel}\n\`\`\`json\n{"x": 1}\n\`\`\`\n\nUser message`;
+      expect(stripInboundMetadata(input)).toBe("User message");
+    }
+  });
+
+  it("handles metadata block with no user text after it", () => {
+    expect(stripInboundMetadata(CONV_BLOCK)).toBe("");
+  });
+
+  it("preserves message containing json fences that are not metadata", () => {
+    const text = `Here is my code:\n\`\`\`json\n{"key": "value"}\n\`\`\``;
+    expect(stripInboundMetadata(text)).toBe(text);
+  });
+
+  it("preserves leading newlines in user content after stripping", () => {
+    const input = `${CONV_BLOCK}\n\nActual message`;
+    expect(stripInboundMetadata(input)).toBe("Actual message");
+  });
+
+  it("preserves leading spaces in user content after stripping", () => {
+    const input = `${CONV_BLOCK}\n\n  Indented message`;
+    expect(stripInboundMetadata(input)).toBe("  Indented message");
+  });
+});
--- a/src/auto-reply/reply/strip-inbound-meta.ts
+++ b/src/auto-reply/reply/strip-inbound-meta.ts
@@ -0,0 +1,89 @@
+/**
+ * Strips OpenClaw-injected inbound metadata blocks from a user-role message
+ * text before it is displayed in any UI surface (TUI, webchat, macOS app).
+ *
+ * Background: `buildInboundUserContextPrefix` in `inbound-meta.ts` prepends
+ * structured metadata blocks (Conversation info, Sender info, reply context,
+ * etc.) directly to the stored user message content so the LLM can access
+ * them. These blocks are AI-facing only and must never surface in user-visible
+ * chat history.
+ */
+
+/**
+ * Sentinel strings that identify the start of an injected metadata block.
+ * Must stay in sync with `buildInboundUserContextPrefix` in `inbound-meta.ts`.
+ */
+const INBOUND_META_SENTINELS = [
+  "Conversation info (untrusted metadata):",
+  "Sender (untrusted metadata):",
+  "Thread starter (untrusted, for context):",
+  "Replied message (untrusted, for context):",
+  "Forwarded message context (untrusted metadata):",
+  "Chat history since last reply (untrusted, for context):",
+] as const;
+
+// Pre-compiled fast-path regex — avoids line-by-line parse when no blocks present.
+const SENTINEL_FAST_RE = new RegExp(
+  INBOUND_META_SENTINELS.map((s) => s.replace(/[.*+?^${}()|[\]\\]/g, "\\$&")).join("|"),
+);
+
+/**
+ * Remove all injected inbound metadata prefix blocks from `text`.
+ *
+ * Each block has the shape:
+ *
+ * ```
+ * <sentinel-line>
+ * ```json
+ * { … }
+ * ```
+ * ```
+ *
+ * Returns the original string reference unchanged when no metadata is present
+ * (fast path — zero allocation).
+ */
+export function stripInboundMetadata(text: string): string {
+  if (!text || !SENTINEL_FAST_RE.test(text)) {
+    return text;
+  }
+
+  const lines = text.split("\n");
+  const result: string[] = [];
+  let inMetaBlock = false;
+  let inFencedJson = false;
+
+  for (let i = 0; i < lines.length; i++) {
+    const line = lines[i];
+
+    // Detect start of a metadata block.
+    if (!inMetaBlock && INBOUND_META_SENTINELS.some((s) => line.startsWith(s))) {
+      inMetaBlock = true;
+      inFencedJson = false;
+      continue;
+    }
+
+    if (inMetaBlock) {
+      if (!inFencedJson && line.trim() === "```json") {
+        inFencedJson = true;
+        continue;
+      }
+      if (inFencedJson) {
+        if (line.trim() === "```") {
+          inMetaBlock = false;
+          inFencedJson = false;
+        }
+        continue;
+      }
+      // Blank separator lines between consecutive blocks are dropped.
+      if (line.trim() === "") {
+        continue;
+      }
+      // Unexpected non-blank line outside a fence — treat as user content.
+      inMetaBlock = false;
+    }
+
+    result.push(line);
+  }
+
+  return result.join("\n").replace(/^\n+/, "");
+}