mirror of
https://github.com/openclaw/openclaw.git
synced 2026-03-30 06:14:23 +00:00
fix(security): harden tar archive extraction parity
This commit is contained in:
@@ -1,15 +1,14 @@
|
||||
import { createHash } from "node:crypto";
|
||||
import fs from "node:fs";
|
||||
import path from "node:path";
|
||||
import { Readable } from "node:stream";
|
||||
import { pipeline } from "node:stream/promises";
|
||||
import type { ReadableStream as NodeReadableStream } from "node:stream/web";
|
||||
import { isWindowsDrivePath } from "../infra/archive-path.js";
|
||||
import {
|
||||
isWindowsDrivePath,
|
||||
resolveArchiveOutputPath,
|
||||
stripArchivePath,
|
||||
validateArchiveEntryPath,
|
||||
} from "../infra/archive-path.js";
|
||||
import { extractArchive as extractArchiveSafe } from "../infra/archive.js";
|
||||
createTarEntrySafetyChecker,
|
||||
extractArchive as extractArchiveSafe,
|
||||
} from "../infra/archive.js";
|
||||
import { fetchWithSsrFGuard } from "../infra/net/fetch-guard.js";
|
||||
import { isWithinDir } from "../infra/path-safety.js";
|
||||
import { runCommandWithTimeout } from "../process/exec.js";
|
||||
@@ -63,6 +62,101 @@ function resolveArchiveType(spec: SkillInstallSpec, filename: string): string |
|
||||
return undefined;
|
||||
}
|
||||
|
||||
const TAR_VERBOSE_MONTHS = new Set([
|
||||
"Jan",
|
||||
"Feb",
|
||||
"Mar",
|
||||
"Apr",
|
||||
"May",
|
||||
"Jun",
|
||||
"Jul",
|
||||
"Aug",
|
||||
"Sep",
|
||||
"Oct",
|
||||
"Nov",
|
||||
"Dec",
|
||||
]);
|
||||
const ISO_DATE_PATTERN = /^\d{4}-\d{2}-\d{2}$/;
|
||||
|
||||
function mapTarVerboseTypeChar(typeChar: string): string {
|
||||
switch (typeChar) {
|
||||
case "l":
|
||||
return "SymbolicLink";
|
||||
case "h":
|
||||
return "Link";
|
||||
case "b":
|
||||
return "BlockDevice";
|
||||
case "c":
|
||||
return "CharacterDevice";
|
||||
case "p":
|
||||
return "FIFO";
|
||||
case "s":
|
||||
return "Socket";
|
||||
case "d":
|
||||
return "Directory";
|
||||
default:
|
||||
return "File";
|
||||
}
|
||||
}
|
||||
|
||||
function parseTarVerboseSize(line: string): number {
|
||||
const tokens = line.trim().split(/\s+/).filter(Boolean);
|
||||
if (tokens.length < 6) {
|
||||
throw new Error(`unable to parse tar verbose metadata: ${line}`);
|
||||
}
|
||||
|
||||
let dateIndex = tokens.findIndex((token) => TAR_VERBOSE_MONTHS.has(token));
|
||||
if (dateIndex > 0) {
|
||||
const size = Number.parseInt(tokens[dateIndex - 1] ?? "", 10);
|
||||
if (!Number.isFinite(size) || size < 0) {
|
||||
throw new Error(`unable to parse tar entry size: ${line}`);
|
||||
}
|
||||
return size;
|
||||
}
|
||||
|
||||
dateIndex = tokens.findIndex((token) => ISO_DATE_PATTERN.test(token));
|
||||
if (dateIndex > 0) {
|
||||
const size = Number.parseInt(tokens[dateIndex - 1] ?? "", 10);
|
||||
if (!Number.isFinite(size) || size < 0) {
|
||||
throw new Error(`unable to parse tar entry size: ${line}`);
|
||||
}
|
||||
return size;
|
||||
}
|
||||
|
||||
throw new Error(`unable to parse tar verbose metadata: ${line}`);
|
||||
}
|
||||
|
||||
function parseTarVerboseMetadata(stdout: string): Array<{ type: string; size: number }> {
|
||||
const lines = stdout
|
||||
.split("\n")
|
||||
.map((line) => line.trim())
|
||||
.filter(Boolean);
|
||||
return lines.map((line) => {
|
||||
const typeChar = line[0] ?? "";
|
||||
if (!typeChar) {
|
||||
throw new Error("unable to parse tar entry type");
|
||||
}
|
||||
return {
|
||||
type: mapTarVerboseTypeChar(typeChar),
|
||||
size: parseTarVerboseSize(line),
|
||||
};
|
||||
});
|
||||
}
|
||||
|
||||
async function hashFileSha256(filePath: string): Promise<string> {
|
||||
const hash = createHash("sha256");
|
||||
const stream = fs.createReadStream(filePath);
|
||||
return await new Promise<string>((resolve, reject) => {
|
||||
stream.on("data", (chunk) => {
|
||||
hash.update(chunk as Buffer);
|
||||
});
|
||||
stream.on("error", reject);
|
||||
stream.on("end", () => {
|
||||
resolve(hash.digest("hex"));
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
async function downloadFile(
|
||||
url: string,
|
||||
destPath: string,
|
||||
@@ -132,6 +226,8 @@ async function extractArchive(params: {
|
||||
return { stdout: "", stderr: "tar not found on PATH", code: null };
|
||||
}
|
||||
|
||||
const preflightHash = await hashFileSha256(archivePath);
|
||||
|
||||
// Preflight list to prevent zip-slip style traversal before extraction.
|
||||
const listResult = await runCommandWithTimeout(["tar", "tf", archivePath], { timeoutMs });
|
||||
if (listResult.code !== 0) {
|
||||
@@ -154,34 +250,43 @@ async function extractArchive(params: {
|
||||
code: verboseResult.code,
|
||||
};
|
||||
}
|
||||
for (const line of verboseResult.stdout.split("\n")) {
|
||||
const trimmed = line.trim();
|
||||
if (!trimmed) {
|
||||
continue;
|
||||
}
|
||||
const typeChar = trimmed[0];
|
||||
if (typeChar === "l" || typeChar === "h" || trimmed.includes(" -> ")) {
|
||||
const metadata = parseTarVerboseMetadata(verboseResult.stdout);
|
||||
if (metadata.length !== entries.length) {
|
||||
return {
|
||||
stdout: verboseResult.stdout,
|
||||
stderr: `tar verbose/list entry count mismatch (${metadata.length} vs ${entries.length})`,
|
||||
code: 1,
|
||||
};
|
||||
}
|
||||
const checkTarEntrySafety = createTarEntrySafetyChecker({
|
||||
rootDir: targetDir,
|
||||
stripComponents: strip,
|
||||
escapeLabel: "targetDir",
|
||||
});
|
||||
for (let i = 0; i < entries.length; i += 1) {
|
||||
const entryPath = entries[i];
|
||||
const entryMeta = metadata[i];
|
||||
if (!entryPath || !entryMeta) {
|
||||
return {
|
||||
stdout: verboseResult.stdout,
|
||||
stderr: "tar archive contains link entries; refusing to extract for safety",
|
||||
stderr: "tar metadata parse failure",
|
||||
code: 1,
|
||||
};
|
||||
}
|
||||
checkTarEntrySafety({
|
||||
path: entryPath,
|
||||
type: entryMeta.type,
|
||||
size: entryMeta.size,
|
||||
});
|
||||
}
|
||||
|
||||
for (const entry of entries) {
|
||||
validateArchiveEntryPath(entry, { escapeLabel: "targetDir" });
|
||||
const relPath = stripArchivePath(entry, strip);
|
||||
if (!relPath) {
|
||||
continue;
|
||||
}
|
||||
validateArchiveEntryPath(relPath, { escapeLabel: "targetDir" });
|
||||
resolveArchiveOutputPath({
|
||||
rootDir: targetDir,
|
||||
relPath,
|
||||
originalPath: entry,
|
||||
escapeLabel: "targetDir",
|
||||
});
|
||||
const postPreflightHash = await hashFileSha256(archivePath);
|
||||
if (postPreflightHash !== preflightHash) {
|
||||
return {
|
||||
stdout: "",
|
||||
stderr: "tar archive changed during safety preflight; refusing to extract",
|
||||
code: 1,
|
||||
};
|
||||
}
|
||||
|
||||
const argv = ["tar", "xf", archivePath, "-C", targetDir];
|
||||
|
||||
@@ -260,13 +260,35 @@ describe("installDownloadSpec extraction safety (tar.bz2)", () => {
|
||||
label: "rejects archives containing symlinks",
|
||||
name: "tbz2-symlink",
|
||||
url: "https://example.invalid/evil.tbz2",
|
||||
listOutput: "link\nlink/pwned.txt\n",
|
||||
listOutput: "link\n",
|
||||
verboseListOutput: "lrwxr-xr-x 0 0 0 0 Jan 1 00:00 link -> ../outside\n",
|
||||
extract: "reject" as const,
|
||||
expectedOk: false,
|
||||
expectedExtract: false,
|
||||
expectedStderrSubstring: "link",
|
||||
},
|
||||
{
|
||||
label: "rejects archives containing FIFO entries",
|
||||
name: "tbz2-fifo",
|
||||
url: "https://example.invalid/evil.tbz2",
|
||||
listOutput: "evil-fifo\n",
|
||||
verboseListOutput: "prw-r--r-- 0 0 0 0 Jan 1 00:00 evil-fifo\n",
|
||||
extract: "reject" as const,
|
||||
expectedOk: false,
|
||||
expectedExtract: false,
|
||||
expectedStderrSubstring: "link",
|
||||
},
|
||||
{
|
||||
label: "rejects oversized extracted entries",
|
||||
name: "tbz2-oversized",
|
||||
url: "https://example.invalid/oversized.tbz2",
|
||||
listOutput: "big.bin\n",
|
||||
verboseListOutput: "-rw-r--r-- 0 0 0 314572800 Jan 1 00:00 big.bin\n",
|
||||
extract: "reject" as const,
|
||||
expectedOk: false,
|
||||
expectedExtract: false,
|
||||
expectedStderrSubstring: "archive entry extracted size exceeds limit",
|
||||
},
|
||||
{
|
||||
label: "extracts safe archives with stripComponents",
|
||||
name: "tbz2-ok",
|
||||
@@ -322,4 +344,44 @@ describe("installDownloadSpec extraction safety (tar.bz2)", () => {
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
it("rejects tar.bz2 archives that change after preflight", async () => {
|
||||
const entry = buildEntry("tbz2-preflight-change");
|
||||
const targetDir = path.join(resolveSkillToolsRootDir(entry), "target");
|
||||
const commandCallCount = runCommandWithTimeoutMock.mock.calls.length;
|
||||
|
||||
mockArchiveResponse(new Uint8Array([1, 2, 3]));
|
||||
|
||||
runCommandWithTimeoutMock.mockImplementation(async (...argv: unknown[]) => {
|
||||
const cmd = (argv[0] ?? []) as string[];
|
||||
if (cmd[0] === "tar" && cmd[1] === "tf") {
|
||||
return runCommandResult({ stdout: "package/hello.txt\n" });
|
||||
}
|
||||
if (cmd[0] === "tar" && cmd[1] === "tvf") {
|
||||
const archivePath = String(cmd[2] ?? "");
|
||||
if (archivePath) {
|
||||
await fs.appendFile(archivePath, "mutated");
|
||||
}
|
||||
return runCommandResult({ stdout: "-rw-r--r-- 0 0 0 0 Jan 1 00:00 package/hello.txt\n" });
|
||||
}
|
||||
if (cmd[0] === "tar" && cmd[1] === "xf") {
|
||||
throw new Error("should not extract");
|
||||
}
|
||||
return runCommandResult();
|
||||
});
|
||||
|
||||
const result = await installDownloadSkill({
|
||||
name: "tbz2-preflight-change",
|
||||
url: "https://example.invalid/change.tbz2",
|
||||
archive: "tar.bz2",
|
||||
targetDir,
|
||||
});
|
||||
|
||||
expect(result.ok).toBe(false);
|
||||
expect(result.stderr).toContain("changed during safety preflight");
|
||||
const extractionAttempted = runCommandWithTimeoutMock.mock.calls
|
||||
.slice(commandCallCount)
|
||||
.some((call) => (call[0] as string[])[1] === "xf");
|
||||
expect(extractionAttempted).toBe(false);
|
||||
});
|
||||
});
|
||||
|
||||
@@ -176,23 +176,30 @@ describe("archive utils", () => {
|
||||
},
|
||||
);
|
||||
|
||||
it("rejects archives that exceed archive size budget", async () => {
|
||||
await withArchiveCase("zip", async ({ archivePath, extractDir }) => {
|
||||
const zip = new JSZip();
|
||||
zip.file("package/file.txt", "ok");
|
||||
await fs.writeFile(archivePath, await zip.generateAsync({ type: "nodebuffer" }));
|
||||
const stat = await fs.stat(archivePath);
|
||||
|
||||
await expect(
|
||||
extractArchive({
|
||||
it.each([{ ext: "zip" as const }, { ext: "tar" as const }])(
|
||||
"rejects $ext archives that exceed archive size budget",
|
||||
async ({ ext }) => {
|
||||
await withArchiveCase(ext, async ({ workDir, archivePath, extractDir }) => {
|
||||
await writePackageArchive({
|
||||
ext,
|
||||
workDir,
|
||||
archivePath,
|
||||
destDir: extractDir,
|
||||
timeoutMs: 5_000,
|
||||
limits: { maxArchiveBytes: Math.max(1, stat.size - 1) },
|
||||
}),
|
||||
).rejects.toThrow("archive size exceeds limit");
|
||||
});
|
||||
});
|
||||
fileName: "file.txt",
|
||||
content: "ok",
|
||||
});
|
||||
const stat = await fs.stat(archivePath);
|
||||
|
||||
await expect(
|
||||
extractArchive({
|
||||
archivePath,
|
||||
destDir: extractDir,
|
||||
timeoutMs: 5_000,
|
||||
limits: { maxArchiveBytes: Math.max(1, stat.size - 1) },
|
||||
}),
|
||||
).rejects.toThrow("archive size exceeds limit");
|
||||
});
|
||||
},
|
||||
);
|
||||
|
||||
it("fails resolvePackedRootDir when extract dir has multiple root dirs", async () => {
|
||||
const workDir = await makeTempDir("packed-root");
|
||||
|
||||
@@ -21,8 +21,7 @@ export type ArchiveLogger = {
|
||||
|
||||
export type ArchiveExtractLimits = {
|
||||
/**
|
||||
* Max archive file bytes (compressed). Primarily protects zip extraction
|
||||
* because we currently read the whole archive into memory for parsing.
|
||||
* Max archive file bytes (compressed).
|
||||
*/
|
||||
maxArchiveBytes?: number;
|
||||
/** Max number of extracted entries (files + dirs). */
|
||||
@@ -457,7 +456,16 @@ async function extractZip(params: {
|
||||
}
|
||||
}
|
||||
|
||||
type TarEntryInfo = { path: string; type: string; size: number };
|
||||
export type TarEntryInfo = { path: string; type: string; size: number };
|
||||
|
||||
const BLOCKED_TAR_ENTRY_TYPES = new Set([
|
||||
"SymbolicLink",
|
||||
"Link",
|
||||
"BlockDevice",
|
||||
"CharacterDevice",
|
||||
"FIFO",
|
||||
"Socket",
|
||||
]);
|
||||
|
||||
function readTarEntryInfo(entry: unknown): TarEntryInfo {
|
||||
const p =
|
||||
@@ -479,6 +487,42 @@ function readTarEntryInfo(entry: unknown): TarEntryInfo {
|
||||
return { path: p, type: t, size: s };
|
||||
}
|
||||
|
||||
export function createTarEntrySafetyChecker(params: {
|
||||
rootDir: string;
|
||||
stripComponents?: number;
|
||||
limits?: ArchiveExtractLimits;
|
||||
escapeLabel?: string;
|
||||
}): (entry: TarEntryInfo) => void {
|
||||
const strip = Math.max(0, Math.floor(params.stripComponents ?? 0));
|
||||
const limits = resolveExtractLimits(params.limits);
|
||||
let entryCount = 0;
|
||||
const budget = createByteBudgetTracker(limits);
|
||||
|
||||
return (entry: TarEntryInfo) => {
|
||||
validateArchiveEntryPath(entry.path, { escapeLabel: params.escapeLabel });
|
||||
|
||||
const relPath = stripArchivePath(entry.path, strip);
|
||||
if (!relPath) {
|
||||
return;
|
||||
}
|
||||
validateArchiveEntryPath(relPath, { escapeLabel: params.escapeLabel });
|
||||
resolveArchiveOutputPath({
|
||||
rootDir: params.rootDir,
|
||||
relPath,
|
||||
originalPath: entry.path,
|
||||
escapeLabel: params.escapeLabel,
|
||||
});
|
||||
|
||||
if (BLOCKED_TAR_ENTRY_TYPES.has(entry.type)) {
|
||||
throw new Error(`tar entry is a link: ${entry.path}`);
|
||||
}
|
||||
|
||||
entryCount += 1;
|
||||
assertArchiveEntryCountWithinLimit(entryCount, limits);
|
||||
budget.addEntrySize(entry.size);
|
||||
};
|
||||
}
|
||||
|
||||
export async function extractArchive(params: {
|
||||
archivePath: string;
|
||||
destDir: string;
|
||||
@@ -496,49 +540,28 @@ export async function extractArchive(params: {
|
||||
|
||||
const label = kind === "zip" ? "extract zip" : "extract tar";
|
||||
if (kind === "tar") {
|
||||
const strip = Math.max(0, Math.floor(params.stripComponents ?? 0));
|
||||
const limits = resolveExtractLimits(params.limits);
|
||||
let entryCount = 0;
|
||||
const budget = createByteBudgetTracker(limits);
|
||||
const stat = await fs.stat(params.archivePath);
|
||||
if (stat.size > limits.maxArchiveBytes) {
|
||||
throw new Error(ERROR_ARCHIVE_SIZE_EXCEEDS_LIMIT);
|
||||
}
|
||||
|
||||
const checkTarEntrySafety = createTarEntrySafetyChecker({
|
||||
rootDir: params.destDir,
|
||||
stripComponents: params.stripComponents,
|
||||
limits,
|
||||
});
|
||||
await withTimeout(
|
||||
tar.x({
|
||||
file: params.archivePath,
|
||||
cwd: params.destDir,
|
||||
strip,
|
||||
strip: Math.max(0, Math.floor(params.stripComponents ?? 0)),
|
||||
gzip: params.tarGzip,
|
||||
preservePaths: false,
|
||||
strict: true,
|
||||
onReadEntry(entry) {
|
||||
const info = readTarEntryInfo(entry);
|
||||
|
||||
try {
|
||||
validateArchiveEntryPath(info.path);
|
||||
|
||||
const relPath = stripArchivePath(info.path, strip);
|
||||
if (!relPath) {
|
||||
return;
|
||||
}
|
||||
validateArchiveEntryPath(relPath);
|
||||
resolveArchiveOutputPath({
|
||||
rootDir: params.destDir,
|
||||
relPath,
|
||||
originalPath: info.path,
|
||||
});
|
||||
|
||||
if (
|
||||
info.type === "SymbolicLink" ||
|
||||
info.type === "Link" ||
|
||||
info.type === "BlockDevice" ||
|
||||
info.type === "CharacterDevice" ||
|
||||
info.type === "FIFO" ||
|
||||
info.type === "Socket"
|
||||
) {
|
||||
throw new Error(`tar entry is a link: ${info.path}`);
|
||||
}
|
||||
|
||||
entryCount += 1;
|
||||
assertArchiveEntryCountWithinLimit(entryCount, limits);
|
||||
budget.addEntrySize(info.size);
|
||||
checkTarEntrySafety(readTarEntryInfo(entry));
|
||||
} catch (err) {
|
||||
const error = err instanceof Error ? err : new Error(String(err));
|
||||
// Node's EventEmitter calls listeners with `this` bound to the
|
||||
|
||||
Reference in New Issue
Block a user