fix(tui): strip inbound metadata blocks from user messages (clean rewrite) (#22345)

* fix(tui): strip inbound metadata blocks from user text

* chore: clean up metadata-strip format and changelog credit

* chore: format tui metadata-strip tests

* test: align metadata-strip regression expectations

* refactor: reuse canonical inbound metadata stripper

* test: allow tmp media fixture paths in media-understanding tests

* refactor: reuse canonical inbound metadata stripper

* format: fix changelog blank line after headings

* test: fix unrelated check typing regressions

* test: align memory async mock embedding signatures

* test: avoid tsgo mock typing pitfall

* test: restore async search mock typings in merge tree

* test: trigger ci rerun without behavior change

* chore: dedupe todays changelog entries

* fix: dedupe sqlite mock keys in qmd manager test

* Update qmd-manager.test.ts

* test: align chat metadata sanitization expectation
This commit is contained in:
Vincent Koc
2026-02-20 23:52:43 -05:00
committed by GitHub
parent 338ae269d6
commit 35be87b09b
10 changed files with 101 additions and 119 deletions

View File

@@ -33,7 +33,8 @@ Docs: https://docs.openclaw.ai
- Security/OpenClawKit/UI: strip inbound metadata blocks from user messages in TUI rendering while preserving user-authored content. (#22345) Thanks @kansodata, @vincentkoc.
- Security/OpenClawKit/UI: prevent inbound metadata leaks and reply-tag streaming artifacts in TUI rendering by stripping untrusted metadata prefixes at display boundaries. (#22346) Thanks @akramcodez, @vincentkoc.
- Agents/System Prompt: label allowlisted senders as authorized senders to avoid implying ownership. Thanks @thewilloftheshadow.
- Agents/Tool display: fix exec cwd suffix inference so `pushd ... && popd ... && <command>` does not keep stale `(in <dir>)` context in summaries. (#21925) thanks @Lukavyi.
- Agents/Tool display: fix exec cwd suffix inference so `pushd ... && popd ... && <command>` does not keep stale `(in <dir>)` context in summaries. (#21925) Thanks @Lukavyi.
- Discord: restore model picker back navigation when a provider is missing and document the Discord picker flow. (#21458) Thanks @pejmanjohn and @thewilloftheshadow.
- Gateway/Auth: allow trusted-proxy mode with loopback bind for same-host reverse-proxy deployments, while still requiring configured `gateway.trustedProxies`. (#20097) thanks @xinhuagu.
- Gateway/Auth: allow authenticated clients across roles/scopes to call `health` while preserving role and scope enforcement for non-health methods. (#19699) thanks @Nachx639.
- Gateway/Security: remove shared-IP fallback for canvas endpoints and require token or session capability for canvas access. Thanks @thewilloftheshadow.
@@ -59,7 +60,6 @@ Docs: https://docs.openclaw.ai
- WhatsApp/Cron/Heartbeat: enforce allowlisted routing for implicit scheduled/system delivery by merging pairing-store + configured `allowFrom` recipients, selecting authorized recipients when last-route context points to a non-allowlisted chat, and preventing heartbeat fan-out to recent unauthorized chats.
- Heartbeat/Active hours: constrain active-hours `24` sentinel parsing to `24:00` in time validation so invalid values like `24:30` are rejected early. (#21410) thanks @adhitShet.
- Heartbeat: treat `activeHours` windows with identical `start`/`end` times as zero-width (always outside the window) instead of always-active. (#21408) thanks @adhitShet.
- Discord: restore model picker back navigation when a provider is missing and document the Discord picker flow. (#21458) Thanks @pejmanjohn and @thewilloftheshadow.
- Gateway/Pairing: tolerate legacy paired devices missing `roles`/`scopes` metadata in websocket upgrade checks and backfill metadata on reconnect. (#21447, fixes #21236) Thanks @joshavant.
- Gateway/Pairing/CLI: align read-scope compatibility in pairing/device-token checks and add local `openclaw devices` fallback recovery for loopback `pairing required` deadlocks, with explicit fallback notice to unblock approval bootstrap flows. (#21616) Thanks @shakkernerd.
- CLI/Pairing: default `pairing list` and `pairing approve` to the sole available pairing channel when omitted, so TUI-only setups can recover from `pairing required` without guessing channel arguments. (#21527) Thanks @losts1.

View File

@@ -25,7 +25,7 @@ describe("sendDiscordComponentMessage", () => {
vi.clearAllMocks();
});
it("registers component entries for DM channel targets", async () => {
it("keeps direct-channel DM session keys on component entries", async () => {
const { rest, postMock, getMock } = makeDiscordRest();
getMock.mockResolvedValueOnce({
type: ChannelType.DM,
@@ -48,6 +48,6 @@ describe("sendDiscordComponentMessage", () => {
expect(registerMock).toHaveBeenCalledTimes(1);
const args = registerMock.mock.calls[0]?.[0];
expect(args?.entries[0]).toBeDefined();
expect(args?.entries[0]?.sessionKey).toBe("agent:main:discord:channel:dm-1");
});
});

View File

@@ -59,15 +59,13 @@ describe("stripEnvelopeFromMessage", () => {
expect(result.content).toBe("Actual user message");
});
test("does not strip metadata-like blocks that are not a prefix", () => {
test("strips metadata-like blocks even when not a prefix", () => {
const input = {
role: "user",
content:
'Actual text\nConversation info (untrusted metadata):\n```json\n{"message_id": "123"}\n```\n\nFollow-up',
};
const result = stripEnvelopeFromMessage(input) as { content?: string };
expect(result.content).toBe(
'Actual text\nConversation info (untrusted metadata):\n```json\n{"message_id": "123"}\n```\n\nFollow-up',
);
expect(result.content).toBe("Actual text\n\nFollow-up");
});
});

View File

@@ -1,8 +1,5 @@
import {
stripEnvelope,
stripInboundMetadataBlocks,
stripMessageIdHints,
} from "../shared/chat-envelope.js";
import { stripInboundMetadata } from "../auto-reply/reply/strip-inbound-meta.js";
import { stripEnvelope, stripMessageIdHints } from "../shared/chat-envelope.js";
export { stripEnvelope };
@@ -16,7 +13,7 @@ function stripEnvelopeFromContent(content: unknown[]): { content: unknown[]; cha
if (entry.type !== "text" || typeof entry.text !== "string") {
return item;
}
const stripped = stripMessageIdHints(stripEnvelope(stripInboundMetadataBlocks(entry.text)));
const stripped = stripMessageIdHints(stripEnvelope(stripInboundMetadata(entry.text)));
if (stripped === entry.text) {
return item;
}
@@ -43,7 +40,7 @@ export function stripEnvelopeFromMessage(message: unknown): unknown {
const next: Record<string, unknown> = { ...entry };
if (typeof entry.content === "string") {
const stripped = stripMessageIdHints(stripEnvelope(stripInboundMetadataBlocks(entry.content)));
const stripped = stripMessageIdHints(stripEnvelope(stripInboundMetadata(entry.content)));
if (stripped !== entry.content) {
next.content = stripped;
changed = true;
@@ -55,7 +52,7 @@ export function stripEnvelopeFromMessage(message: unknown): unknown {
changed = true;
}
} else if (typeof entry.text === "string") {
const stripped = stripMessageIdHints(stripEnvelope(stripInboundMetadataBlocks(entry.text)));
const stripped = stripMessageIdHints(stripEnvelope(stripInboundMetadata(entry.text)));
if (stripped !== entry.text) {
next.text = stripped;
changed = true;

View File

@@ -150,6 +150,7 @@ describe("gateway server chat", () => {
let capturedOpts: GetReplyOptions | undefined;
spy.mockImplementationOnce(async (_ctx: unknown, opts?: GetReplyOptions) => {
capturedOpts = opts;
return undefined;
});
const sendRes = await rpcReq(ws, "chat.send", {
@@ -314,6 +315,7 @@ describe("gateway server chat", () => {
{ once: true },
);
});
return undefined;
});
const sendResP = onceMessage(ws, (o) => o.type === "res" && o.id === "send-abort-1", 8_000);

View File

@@ -4,7 +4,6 @@ import path from "node:path";
import { describe, expect, it } from "vitest";
import type { MsgContext } from "../auto-reply/templating.js";
import type { OpenClawConfig } from "../config/config.js";
import { resolvePreferredOpenClawTmpDir } from "../infra/tmp-openclaw-dir.js";
import {
buildProviderRegistry,
createMediaAttachmentCache,
@@ -20,13 +19,13 @@ async function withAudioFixture(
}) => Promise<void>,
) {
const originalPath = process.env.PATH;
process.env.PATH = "/usr/bin:/bin";
process.env.PATH = "";
const tmpPath = path.join(os.tmpdir(), `openclaw-auto-audio-${Date.now()}.wav`);
await fs.writeFile(tmpPath, Buffer.from("RIFF"));
const ctx: MsgContext = { MediaPath: tmpPath, MediaType: "audio/wav" };
const media = normalizeMediaAttachments(ctx);
const cache = createMediaAttachmentCache(media, {
localPathRoots: [resolvePreferredOpenClawTmpDir(), os.tmpdir()],
localPathRoots: [path.dirname(tmpPath)],
});
try {

View File

@@ -4,7 +4,6 @@ import path from "node:path";
import { describe, expect, it } from "vitest";
import type { MsgContext } from "../auto-reply/templating.js";
import type { OpenClawConfig } from "../config/config.js";
import { resolvePreferredOpenClawTmpDir } from "../infra/tmp-openclaw-dir.js";
import {
buildProviderRegistry,
createMediaAttachmentCache,
@@ -12,78 +11,96 @@ import {
runCapability,
} from "./runner.js";
async function withAudioFixture(
run: (params: {
ctx: MsgContext;
media: ReturnType<typeof normalizeMediaAttachments>;
cache: ReturnType<typeof createMediaAttachmentCache>;
}) => Promise<void>,
) {
const originalPath = process.env.PATH;
process.env.PATH = "";
const tmpPath = path.join(os.tmpdir(), `openclaw-deepgram-${Date.now()}.wav`);
await fs.writeFile(tmpPath, Buffer.from("RIFF"));
const ctx: MsgContext = { MediaPath: tmpPath, MediaType: "audio/wav" };
const media = normalizeMediaAttachments(ctx);
const cache = createMediaAttachmentCache(media, {
localPathRoots: [path.dirname(tmpPath)],
});
try {
await run({ ctx, media, cache });
} finally {
process.env.PATH = originalPath;
await cache.cleanup();
await fs.unlink(tmpPath).catch(() => {});
}
}
describe("runCapability deepgram provider options", () => {
it("merges provider options, headers, and baseUrl overrides", async () => {
const tmpPath = path.join(os.tmpdir(), `openclaw-deepgram-${Date.now()}.wav`);
await fs.writeFile(tmpPath, Buffer.from("RIFF"));
const ctx: MsgContext = { MediaPath: tmpPath, MediaType: "audio/wav" };
const media = normalizeMediaAttachments(ctx);
const cache = createMediaAttachmentCache(media, {
localPathRoots: [resolvePreferredOpenClawTmpDir(), os.tmpdir()],
});
await withAudioFixture(async ({ ctx, media, cache }) => {
let seenQuery: Record<string, string | number | boolean> | undefined;
let seenBaseUrl: string | undefined;
let seenHeaders: Record<string, string> | undefined;
let seenQuery: Record<string, string | number | boolean> | undefined;
let seenBaseUrl: string | undefined;
let seenHeaders: Record<string, string> | undefined;
const providerRegistry = buildProviderRegistry({
deepgram: {
id: "deepgram",
capabilities: ["audio"],
transcribeAudio: async (req) => {
seenQuery = req.query;
seenBaseUrl = req.baseUrl;
seenHeaders = req.headers;
return { text: "ok", model: req.model };
},
},
});
const cfg = {
models: {
providers: {
deepgram: {
baseUrl: "https://provider.example",
apiKey: "test-key",
headers: { "X-Provider": "1" },
models: [],
const providerRegistry = buildProviderRegistry({
deepgram: {
id: "deepgram",
capabilities: ["audio"],
transcribeAudio: async (req) => {
seenQuery = req.query;
seenBaseUrl = req.baseUrl;
seenHeaders = req.headers;
return { text: "ok", model: req.model };
},
},
},
tools: {
media: {
audio: {
enabled: true,
baseUrl: "https://config.example",
headers: { "X-Config": "2" },
providerOptions: {
deepgram: {
detect_language: true,
punctuate: true,
},
});
const cfg = {
models: {
providers: {
deepgram: {
baseUrl: "https://provider.example",
apiKey: "test-key",
headers: { "X-Provider": "1" },
models: [],
},
deepgram: { smartFormat: true },
models: [
{
provider: "deepgram",
model: "nova-3",
baseUrl: "https://entry.example",
headers: { "X-Entry": "3" },
providerOptions: {
deepgram: {
detectLanguage: false,
punctuate: false,
smart_format: true,
},
},
},
tools: {
media: {
audio: {
enabled: true,
baseUrl: "https://config.example",
headers: { "X-Config": "2" },
providerOptions: {
deepgram: {
detect_language: true,
punctuate: true,
},
},
],
deepgram: { smartFormat: true },
models: [
{
provider: "deepgram",
model: "nova-3",
baseUrl: "https://entry.example",
headers: { "X-Entry": "3" },
providerOptions: {
deepgram: {
detectLanguage: false,
punctuate: false,
smart_format: true,
},
},
},
],
},
},
},
},
} as unknown as OpenClawConfig;
} as unknown as OpenClawConfig;
try {
const result = await runCapability({
capability: "audio",
cfg,
@@ -105,9 +122,6 @@ describe("runCapability deepgram provider options", () => {
smart_format: true,
});
expect((seenQuery as Record<string, unknown>)["detectLanguage"]).toBeUndefined();
} finally {
await cache.cleanup();
await fs.unlink(tmpPath).catch(() => {});
}
});
});
});

View File

@@ -7,8 +7,8 @@ import { getMemorySearchManager, type MemoryIndexManager } from "./index.js";
import { createOpenAIEmbeddingProviderMock } from "./test-embeddings-mock.js";
import { createMemoryManagerOrThrow } from "./test-manager.js";
const embedBatch = vi.fn(async (_input: string[]) => [] as number[][]);
const embedQuery = vi.fn(async (_input: string) => [0.2, 0.2, 0.2] as number[]);
const embedBatch = vi.fn(async (_input: string[]): Promise<number[][]> => []);
const embedQuery = vi.fn(async (_input: string): Promise<number[]> => [0.2, 0.2, 0.2]);
vi.mock("./embeddings.js", () => ({
createEmbeddingProvider: async (_options: unknown) =>
@@ -61,7 +61,6 @@ describe("memory search async sync", () => {
it("does not await sync when searching", async () => {
const cfg = buildConfig();
manager = await createMemoryManagerOrThrow(cfg);
const pending = new Promise<void>(() => {});
@@ -78,9 +77,9 @@ describe("memory search async sync", () => {
it("waits for in-flight search sync during close", async () => {
const cfg = buildConfig();
let releaseSync!: (value?: void) => void;
let releaseSync = () => {};
const syncGate = new Promise<void>((resolve) => {
releaseSync = resolve;
releaseSync = () => resolve();
});
embedBatch.mockImplementation(async (input: string[]) => {
await syncGate;

View File

@@ -16,21 +16,6 @@ const ENVELOPE_CHANNELS = [
];
const MESSAGE_ID_LINE = /^\s*\[message_id:\s*[^\]]+\]\s*$/i;
const INBOUND_METADATA_HEADERS = [
"Conversation info (untrusted metadata):",
"Sender (untrusted metadata):",
"Thread starter (untrusted, for context):",
"Replied message (untrusted, for context):",
"Forwarded message context (untrusted metadata):",
"Chat history since last reply (untrusted, for context):",
];
const REGEX_ESCAPE_RE = /[.*+?^${}()|[\]\\-]/g;
const INBOUND_METADATA_PREFIX_RE = new RegExp(
"^\\s*(?:" +
INBOUND_METADATA_HEADERS.map((header) => header.replace(REGEX_ESCAPE_RE, "\\$&")).join("|") +
")\\r?\\n```json\\r?\\n[\\s\\S]*?\\r?\\n```(?:\\r?\\n)*",
);
function looksLikeEnvelopeHeader(header: string): boolean {
if (/\d{4}-\d{2}-\d{2}T\d{2}:\d{2}Z\b/.test(header)) {
return true;
@@ -61,15 +46,3 @@ export function stripMessageIdHints(text: string): string {
const filtered = lines.filter((line) => !MESSAGE_ID_LINE.test(line));
return filtered.length === lines.length ? text : filtered.join("\n");
}
export function stripInboundMetadataBlocks(text: string): string {
let remaining = text;
for (;;) {
const match = INBOUND_METADATA_PREFIX_RE.exec(remaining);
if (!match) {
break;
}
remaining = remaining.slice(match[0].length).replace(/^\r?\n+/, "");
}
return remaining.trim();
}

View File

@@ -1,5 +1,5 @@
import { formatRawAssistantErrorForUi } from "../agents/pi-embedded-helpers.js";
import { stripInboundMetadataBlocks } from "../shared/chat-envelope.js";
import { stripInboundMetadata } from "../auto-reply/reply/strip-inbound-meta.js";
import { stripAnsi } from "../terminal/ansi.js";
import { formatTokenCount } from "../utils/usage-format.js";
@@ -275,7 +275,7 @@ export function extractTextFromMessage(
const text = extractTextBlocks(record.content, opts);
if (text) {
if (record.role === "user") {
return stripInboundMetadataBlocks(text);
return stripInboundMetadata(text);
}
return text;
}