fix(tui): strip inbound metadata blocks from user text

This commit is contained in:
Vincent Koc
2026-02-20 18:04:31 -08:00
parent 02ac5b59d1
commit 135e6019f7
5 changed files with 121 additions and 4 deletions

View File

@@ -39,4 +39,35 @@ describe("stripEnvelopeFromMessage", () => {
const result = stripEnvelopeFromMessage(input) as { content?: string };
expect(result.content).toBe("note\n[message_id: 123]");
});
test("removes inbound un-bracketed conversation info blocks from user messages", () => {
const input = {
role: "user",
content:
'Conversation info (untrusted metadata):\n```json\n{\n "message_id": "123"\n}\n```\n\nHello there',
};
const result = stripEnvelopeFromMessage(input) as { content?: string };
expect(result.content).toBe("Hello there");
});
test("removes all inbound metadata blocks before user text", () => {
const input = {
role: "user",
content:
"Thread starter (untrusted, for context):\n```json\n{\"seed\": 1}\n```\n\nSender (untrusted metadata):\n```json\n{\"name\": \"alice\"}\n```\n\nActual user message",
};
const result = stripEnvelopeFromMessage(input) as { content?: string };
expect(result.content).toBe("Actual user message");
});
test("does not strip metadata-like blocks that are not a prefix", () => {
const input = {
role: "user",
content:
"Actual text\nConversation info (untrusted metadata):\n```json\n{\"message_id\": \"123\"}\n```\n\nFollow-up",
};
const result = stripEnvelopeFromMessage(input) as { content?: string };
expect(result.content).toBe(
"Actual text\nConversation info (untrusted metadata):\n```json\n{\"message_id\": \"123\"}\n```\n\nFollow-up",
);
});
});

View File

@@ -1,4 +1,8 @@
import { stripEnvelope, stripMessageIdHints } from "../shared/chat-envelope.js";
import {
stripEnvelope,
stripInboundMetadataBlocks,
stripMessageIdHints,
} from "../shared/chat-envelope.js";
export { stripEnvelope };
@@ -12,7 +16,7 @@ function stripEnvelopeFromContent(content: unknown[]): { content: unknown[]; cha
if (entry.type !== "text" || typeof entry.text !== "string") {
return item;
}
const stripped = stripMessageIdHints(stripEnvelope(entry.text));
const stripped = stripMessageIdHints(stripEnvelope(stripInboundMetadataBlocks(entry.text)));
if (stripped === entry.text) {
return item;
}
@@ -39,7 +43,9 @@ export function stripEnvelopeFromMessage(message: unknown): unknown {
const next: Record<string, unknown> = { ...entry };
if (typeof entry.content === "string") {
const stripped = stripMessageIdHints(stripEnvelope(entry.content));
const stripped = stripMessageIdHints(
stripEnvelope(stripInboundMetadataBlocks(entry.content)),
);
if (stripped !== entry.content) {
next.content = stripped;
changed = true;
@@ -51,7 +57,9 @@ export function stripEnvelopeFromMessage(message: unknown): unknown {
changed = true;
}
} else if (typeof entry.text === "string") {
const stripped = stripMessageIdHints(stripEnvelope(entry.text));
const stripped = stripMessageIdHints(
stripEnvelope(stripInboundMetadataBlocks(entry.text)),
);
if (stripped !== entry.text) {
next.text = stripped;
changed = true;

View File

@@ -16,6 +16,18 @@ const ENVELOPE_CHANNELS = [
];
const MESSAGE_ID_LINE = /^\s*\[message_id:\s*[^\]]+\]\s*$/i;
const INBOUND_METADATA_HEADERS = [
"Conversation info (untrusted metadata):",
"Sender (untrusted metadata):",
"Thread starter (untrusted, for context):",
"Replied message (untrusted, for context):",
"Forwarded message context (untrusted metadata):",
"Chat history since last reply (untrusted, for context):",
];
const REGEX_ESCAPE_RE = /[.*+?^${}()|[\]\\]/g;
const INBOUND_METADATA_PREFIX_RE = new RegExp(
`^\\s*(?:${INBOUND_METADATA_HEADERS.map((header) => header.replace(REGEX_ESCAPE_RE, "\\$&")).join("|")})\\r?\\n\`\`\`json\\r?\\n[\\s\\S]*?\\r?\\n\`\`\`(?:\\r?\\n)*`,
);
function looksLikeEnvelopeHeader(header: string): boolean {
if (/\d{4}-\d{2}-\d{2}T\d{2}:\d{2}Z\b/.test(header)) {
@@ -47,3 +59,15 @@ export function stripMessageIdHints(text: string): string {
const filtered = lines.filter((line) => !MESSAGE_ID_LINE.test(line));
return filtered.length === lines.length ? text : filtered.join("\n");
}
export function stripInboundMetadataBlocks(text: string): string {
let remaining = text;
for (;;) {
const match = INBOUND_METADATA_PREFIX_RE.exec(remaining);
if (!match) {
break;
}
remaining = remaining.slice(match[0].length).replace(/^\r?\n+/, "");
}
return remaining.trim();
}

View File

@@ -95,6 +95,56 @@ describe("extractTextFromMessage", () => {
expect(text).toBe("[binary data omitted]");
});
it("strips leading inbound metadata blocks for user messages", () => {
const text = extractTextFromMessage({
role: "user",
content: `Conversation info (untrusted metadata):
\`\`\`json
{
"message_id": "abc123"
}
\`\`\`
Sender (untrusted metadata):
\`\`\`json
{
"label": "Someone"
}
\`\`\`
Actual user message`,
});
expect(text).toBe("Actual user message");
});
it("keeps metadata-like blocks for non-user messages", () => {
const text = extractTextFromMessage({
role: "assistant",
content: `Conversation info (untrusted metadata):
\`\`\`json
{"message_id":"abc123"}
\`\`\`
Assistant body`,
});
expect(text).toContain("Conversation info (untrusted metadata):");
expect(text).toContain("Assistant body");
});
it("does not strip metadata-like blocks that are not a leading prefix", () => {
const text = extractTextFromMessage({
role: "user",
content:
"Hello world\nConversation info (untrusted metadata):\n```json\n{\"message_id\":\"123\"}\n```\n\nFollow-up",
});
expect(text).toBe(
"Hello world\nConversation info (untrusted metadata):\n```json\n{\"message_id\":\"123\"}\n```\n\nFollow-up",
);
});
});
describe("extractThinkingFromMessage", () => {

View File

@@ -1,4 +1,5 @@
import { formatRawAssistantErrorForUi } from "../agents/pi-embedded-helpers.js";
import { stripInboundMetadataBlocks } from "../shared/chat-envelope.js";
import { stripAnsi } from "../terminal/ansi.js";
import { formatTokenCount } from "../utils/usage-format.js";
@@ -273,6 +274,9 @@ export function extractTextFromMessage(
const record = message as Record<string, unknown>;
const text = extractTextBlocks(record.content, opts);
if (text) {
if (record.role === "user") {
return stripInboundMetadataBlocks(text);
}
return text;
}