fix(discord): harden voice message media loading

This commit is contained in:
Peter Steinberger
2026-02-15 03:41:01 +01:00
parent a6f3048e44
commit 725741486f
5 changed files with 96 additions and 27 deletions

View File

@@ -33,6 +33,7 @@ Docs: https://docs.openclaw.ai
- Cron: prevent `cron list`/`cron status` from silently skipping past-due recurring jobs by using maintenance recompute semantics. (#16156) Thanks @zerone0x.
- Cron: repair missing/corrupt `nextRunAtMs` for the updated job without globally recomputing unrelated due jobs during `cron update`. (#15750)
- Discord: prefer gateway guild id when logging inbound messages so cached-miss guilds do not appear as `guild=dm`. Thanks @thewilloftheshadow.
- Discord/Security: harden voice message media loading (SSRF + allowed-local-root checks) so tool-supplied paths/URLs cannot be used to probe internal URLs or read arbitrary local files.
- TUI: use available terminal width for session name display in searchable select lists. (#16238) Thanks @robbyczgw-cla.
- TUI: refactor searchable select list description layout and add regression coverage for ANSI-highlight width bounds.
- TUI/Gateway: resolve local gateway target URL from `gateway.bind` mode (tailnet/lan) instead of hardcoded localhost so `openclaw tui` connects when gateway is non-loopback. (#16299) Thanks @cortexuvula.

View File

@@ -23,6 +23,7 @@ import {
} from "../../discord/send.js";
import { resolveDiscordChannelId } from "../../discord/targets.js";
import { withNormalizedTimestamp } from "../date-time.js";
import { assertMediaNotDataUrl } from "../sandbox-paths.js";
import {
type ActionGate,
jsonResult,
@@ -247,7 +248,7 @@ export async function handleDiscordMessagingAction(
if (asVoice) {
if (!mediaUrl) {
throw new Error(
"Voice messages require a local media file path (mediaUrl, path, or filePath).",
"Voice messages require a media file reference (mediaUrl, path, or filePath).",
);
}
if (content && content.trim()) {
@@ -255,11 +256,7 @@ export async function handleDiscordMessagingAction(
"Voice messages cannot include text content (Discord limitation). Remove the content parameter.",
);
}
if (mediaUrl.startsWith("http://") || mediaUrl.startsWith("https://")) {
throw new Error(
"Voice messages require a local file path, not a URL. Download the file first.",
);
}
assertMediaNotDataUrl(mediaUrl);
const result = await sendVoiceMessageDiscord(to, mediaUrl, {
...(accountId ? { accountId } : {}),
replyTo,

View File

@@ -1,7 +1,9 @@
import type { RequestClient } from "@buape/carbon";
import type { APIChannel } from "discord-api-types/v10";
import { ChannelType, Routes } from "discord-api-types/v10";
import crypto from "node:crypto";
import fs from "node:fs/promises";
import path from "node:path";
import type { RetryConfig } from "../infra/retry.js";
import type { PollInput } from "../polls.js";
import type { DiscordSendResult } from "./send.types.js";
@@ -9,7 +11,11 @@ import { resolveChunkMode } from "../auto-reply/chunk.js";
import { loadConfig } from "../config/config.js";
import { resolveMarkdownTableMode } from "../config/markdown-tables.js";
import { recordChannelActivity } from "../infra/channel-activity.js";
import { resolvePreferredOpenClawTmpDir } from "../infra/tmp-openclaw-dir.js";
import { convertMarkdownTables } from "../markdown/tables.js";
import { maxBytesForKind } from "../media/constants.js";
import { extensionForMime } from "../media/mime.js";
import { loadWebMediaRaw } from "../web/media.js";
import { resolveDiscordAccount } from "./accounts.js";
import {
buildDiscordSendError,
@@ -306,6 +312,19 @@ type VoiceMessageOpts = {
silent?: boolean;
};
async function materializeVoiceMessageInput(mediaUrl: string): Promise<{ filePath: string }> {
// Security: reuse the standard media loader so we apply SSRF guards + allowed-local-root checks.
// Then write to a private temp file so ffmpeg/ffprobe never sees the original URL/path string.
const media = await loadWebMediaRaw(mediaUrl, maxBytesForKind("audio"));
const extFromName = media.fileName ? path.extname(media.fileName) : "";
const extFromMime = media.contentType ? extensionForMime(media.contentType) : "";
const ext = extFromName || extFromMime || ".bin";
const tempDir = resolvePreferredOpenClawTmpDir();
const filePath = path.join(tempDir, `voice-src-${crypto.randomUUID()}${ext}`);
await fs.writeFile(filePath, media.buffer, { mode: 0o600 });
return { filePath };
}
/**
* Send a voice message to Discord.
*
@@ -321,19 +340,31 @@ export async function sendVoiceMessageDiscord(
audioPath: string,
opts: VoiceMessageOpts = {},
): Promise<DiscordSendResult> {
const cfg = loadConfig();
const accountInfo = resolveDiscordAccount({
cfg,
accountId: opts.accountId,
});
const { token, rest, request } = createDiscordClient(opts, cfg);
const recipient = await parseAndResolveRecipient(to, opts.accountId);
const { channelId } = await resolveChannelId(rest, recipient, request);
// Convert to OGG/Opus if needed
const { path: oggPath, cleanup } = await ensureOggOpus(audioPath);
const { filePath: localInputPath } = await materializeVoiceMessageInput(audioPath);
let oggPath: string | null = null;
let oggCleanup = false;
let token: string | undefined;
let rest: RequestClient | undefined;
let channelId: string | undefined;
try {
const cfg = loadConfig();
const accountInfo = resolveDiscordAccount({
cfg,
accountId: opts.accountId,
});
const client = createDiscordClient(opts, cfg);
token = client.token;
rest = client.rest;
const request = client.request;
const recipient = await parseAndResolveRecipient(to, opts.accountId);
channelId = (await resolveChannelId(rest, recipient, request)).channelId;
// Convert to OGG/Opus if needed
const ogg = await ensureOggOpus(localInputPath);
oggPath = ogg.path;
oggCleanup = ogg.cleanup;
// Get voice message metadata (duration and waveform)
const metadata = await getVoiceMessageMetadata(oggPath);
@@ -362,20 +393,28 @@ export async function sendVoiceMessageDiscord(
channelId: String(result.channel_id ?? channelId),
};
} catch (err) {
throw await buildDiscordSendError(err, {
channelId,
rest,
token,
hasMedia: true,
});
if (channelId && rest && token) {
throw await buildDiscordSendError(err, {
channelId,
rest,
token,
hasMedia: true,
});
}
throw err;
} finally {
// Clean up temporary OGG file if we created one
if (cleanup) {
if (oggCleanup && oggPath) {
try {
await fs.unlink(oggPath);
} catch {
// Ignore cleanup errors
}
}
try {
await fs.unlink(localInputPath);
} catch {
// Ignore cleanup errors
}
}
}

View File

@@ -0,0 +1,24 @@
import path from "node:path";
import { describe, expect, it } from "vitest";
import { sendVoiceMessageDiscord } from "./send.js";
describe("sendVoiceMessageDiscord - media hardening", () => {
it("rejects local paths outside allowed media roots (prevents local file exfiltration)", async () => {
const candidate = path.join(process.cwd(), "package.json");
await expect(sendVoiceMessageDiscord("channel:123", candidate)).rejects.toThrow(
/Local media path is not under an allowed directory/,
);
});
it("blocks SSRF targets when given a private-network URL", async () => {
await expect(
sendVoiceMessageDiscord("channel:123", "http://127.0.0.1/voice.ogg"),
).rejects.toThrow(/Failed to fetch media|Blocked/);
});
it("does not allow non-http URL schemes to reach ffmpeg/ffprobe", async () => {
await expect(
sendVoiceMessageDiscord("channel:123", "rtsp://example.com/voice.ogg"),
).rejects.toThrow(/Local media path is not under an allowed directory|ENOENT|no such file/i);
});
});

View File

@@ -14,10 +14,10 @@ import type { RequestClient } from "@buape/carbon";
import { execFile } from "node:child_process";
import crypto from "node:crypto";
import fs from "node:fs/promises";
import os from "node:os";
import path from "node:path";
import { promisify } from "node:util";
import type { RetryRunner } from "../infra/retry-policy.js";
import { resolvePreferredOpenClawTmpDir } from "../infra/tmp-openclaw-dir.js";
const execFileAsync = promisify(execFile);
@@ -73,7 +73,7 @@ export async function generateWaveform(filePath: string): Promise<string> {
* Generate waveform by extracting raw PCM data and sampling amplitudes
*/
async function generateWaveformFromPcm(filePath: string): Promise<string> {
const tempDir = os.tmpdir();
const tempDir = resolvePreferredOpenClawTmpDir();
const tempPcm = path.join(tempDir, `waveform-${crypto.randomUUID()}.raw`);
try {
@@ -148,6 +148,14 @@ function generatePlaceholderWaveform(): string {
* Returns path to the OGG file (may be same as input if already OGG/Opus)
*/
export async function ensureOggOpus(filePath: string): Promise<{ path: string; cleanup: boolean }> {
const trimmed = filePath.trim();
// Defense-in-depth: callers should never hand ffmpeg/ffprobe a URL/protocol path.
if (/^[a-z][a-z0-9+.-]*:\/\//i.test(trimmed)) {
throw new Error(
`Voice message conversion requires a local file path; received a URL/protocol source: ${trimmed}`,
);
}
const ext = path.extname(filePath).toLowerCase();
// Check if already OGG
@@ -174,7 +182,7 @@ export async function ensureOggOpus(filePath: string): Promise<{ path: string; c
}
// Convert to OGG/Opus
const tempDir = os.tmpdir();
const tempDir = resolvePreferredOpenClawTmpDir();
const outputPath = path.join(tempDir, `voice-${crypto.randomUUID()}.ogg`);
await execFileAsync("ffmpeg", [