fix(security): harden tar archive extraction parity

This commit is contained in:
Peter Steinberger
2026-03-02 16:36:46 +00:00
parent 17ede52a4b
commit 0dbb92dd2b
4 changed files with 277 additions and 80 deletions

View File

@@ -1,15 +1,14 @@
import { createHash } from "node:crypto";
import fs from "node:fs";
import path from "node:path";
import { Readable } from "node:stream";
import { pipeline } from "node:stream/promises";
import type { ReadableStream as NodeReadableStream } from "node:stream/web";
import { isWindowsDrivePath } from "../infra/archive-path.js";
import {
isWindowsDrivePath,
resolveArchiveOutputPath,
stripArchivePath,
validateArchiveEntryPath,
} from "../infra/archive-path.js";
import { extractArchive as extractArchiveSafe } from "../infra/archive.js";
createTarEntrySafetyChecker,
extractArchive as extractArchiveSafe,
} from "../infra/archive.js";
import { fetchWithSsrFGuard } from "../infra/net/fetch-guard.js";
import { isWithinDir } from "../infra/path-safety.js";
import { runCommandWithTimeout } from "../process/exec.js";
@@ -63,6 +62,101 @@ function resolveArchiveType(spec: SkillInstallSpec, filename: string): string |
return undefined;
}
const TAR_VERBOSE_MONTHS = new Set([
"Jan",
"Feb",
"Mar",
"Apr",
"May",
"Jun",
"Jul",
"Aug",
"Sep",
"Oct",
"Nov",
"Dec",
]);
const ISO_DATE_PATTERN = /^\d{4}-\d{2}-\d{2}$/;
function mapTarVerboseTypeChar(typeChar: string): string {
switch (typeChar) {
case "l":
return "SymbolicLink";
case "h":
return "Link";
case "b":
return "BlockDevice";
case "c":
return "CharacterDevice";
case "p":
return "FIFO";
case "s":
return "Socket";
case "d":
return "Directory";
default:
return "File";
}
}
function parseTarVerboseSize(line: string): number {
const tokens = line.trim().split(/\s+/).filter(Boolean);
if (tokens.length < 6) {
throw new Error(`unable to parse tar verbose metadata: ${line}`);
}
let dateIndex = tokens.findIndex((token) => TAR_VERBOSE_MONTHS.has(token));
if (dateIndex > 0) {
const size = Number.parseInt(tokens[dateIndex - 1] ?? "", 10);
if (!Number.isFinite(size) || size < 0) {
throw new Error(`unable to parse tar entry size: ${line}`);
}
return size;
}
dateIndex = tokens.findIndex((token) => ISO_DATE_PATTERN.test(token));
if (dateIndex > 0) {
const size = Number.parseInt(tokens[dateIndex - 1] ?? "", 10);
if (!Number.isFinite(size) || size < 0) {
throw new Error(`unable to parse tar entry size: ${line}`);
}
return size;
}
throw new Error(`unable to parse tar verbose metadata: ${line}`);
}
function parseTarVerboseMetadata(stdout: string): Array<{ type: string; size: number }> {
const lines = stdout
.split("\n")
.map((line) => line.trim())
.filter(Boolean);
return lines.map((line) => {
const typeChar = line[0] ?? "";
if (!typeChar) {
throw new Error("unable to parse tar entry type");
}
return {
type: mapTarVerboseTypeChar(typeChar),
size: parseTarVerboseSize(line),
};
});
}
async function hashFileSha256(filePath: string): Promise<string> {
const hash = createHash("sha256");
const stream = fs.createReadStream(filePath);
return await new Promise<string>((resolve, reject) => {
stream.on("data", (chunk) => {
hash.update(chunk as Buffer);
});
stream.on("error", reject);
stream.on("end", () => {
resolve(hash.digest("hex"));
});
});
}
async function downloadFile(
url: string,
destPath: string,
@@ -132,6 +226,8 @@ async function extractArchive(params: {
return { stdout: "", stderr: "tar not found on PATH", code: null };
}
const preflightHash = await hashFileSha256(archivePath);
// Preflight list to prevent zip-slip style traversal before extraction.
const listResult = await runCommandWithTimeout(["tar", "tf", archivePath], { timeoutMs });
if (listResult.code !== 0) {
@@ -154,34 +250,43 @@ async function extractArchive(params: {
code: verboseResult.code,
};
}
for (const line of verboseResult.stdout.split("\n")) {
const trimmed = line.trim();
if (!trimmed) {
continue;
}
const typeChar = trimmed[0];
if (typeChar === "l" || typeChar === "h" || trimmed.includes(" -> ")) {
const metadata = parseTarVerboseMetadata(verboseResult.stdout);
if (metadata.length !== entries.length) {
return {
stdout: verboseResult.stdout,
stderr: `tar verbose/list entry count mismatch (${metadata.length} vs ${entries.length})`,
code: 1,
};
}
const checkTarEntrySafety = createTarEntrySafetyChecker({
rootDir: targetDir,
stripComponents: strip,
escapeLabel: "targetDir",
});
for (let i = 0; i < entries.length; i += 1) {
const entryPath = entries[i];
const entryMeta = metadata[i];
if (!entryPath || !entryMeta) {
return {
stdout: verboseResult.stdout,
stderr: "tar archive contains link entries; refusing to extract for safety",
stderr: "tar metadata parse failure",
code: 1,
};
}
checkTarEntrySafety({
path: entryPath,
type: entryMeta.type,
size: entryMeta.size,
});
}
for (const entry of entries) {
validateArchiveEntryPath(entry, { escapeLabel: "targetDir" });
const relPath = stripArchivePath(entry, strip);
if (!relPath) {
continue;
}
validateArchiveEntryPath(relPath, { escapeLabel: "targetDir" });
resolveArchiveOutputPath({
rootDir: targetDir,
relPath,
originalPath: entry,
escapeLabel: "targetDir",
});
const postPreflightHash = await hashFileSha256(archivePath);
if (postPreflightHash !== preflightHash) {
return {
stdout: "",
stderr: "tar archive changed during safety preflight; refusing to extract",
code: 1,
};
}
const argv = ["tar", "xf", archivePath, "-C", targetDir];

View File

@@ -260,13 +260,35 @@ describe("installDownloadSpec extraction safety (tar.bz2)", () => {
label: "rejects archives containing symlinks",
name: "tbz2-symlink",
url: "https://example.invalid/evil.tbz2",
listOutput: "link\nlink/pwned.txt\n",
listOutput: "link\n",
verboseListOutput: "lrwxr-xr-x 0 0 0 0 Jan 1 00:00 link -> ../outside\n",
extract: "reject" as const,
expectedOk: false,
expectedExtract: false,
expectedStderrSubstring: "link",
},
{
label: "rejects archives containing FIFO entries",
name: "tbz2-fifo",
url: "https://example.invalid/evil.tbz2",
listOutput: "evil-fifo\n",
verboseListOutput: "prw-r--r-- 0 0 0 0 Jan 1 00:00 evil-fifo\n",
extract: "reject" as const,
expectedOk: false,
expectedExtract: false,
expectedStderrSubstring: "link",
},
{
label: "rejects oversized extracted entries",
name: "tbz2-oversized",
url: "https://example.invalid/oversized.tbz2",
listOutput: "big.bin\n",
verboseListOutput: "-rw-r--r-- 0 0 0 314572800 Jan 1 00:00 big.bin\n",
extract: "reject" as const,
expectedOk: false,
expectedExtract: false,
expectedStderrSubstring: "archive entry extracted size exceeds limit",
},
{
label: "extracts safe archives with stripComponents",
name: "tbz2-ok",
@@ -322,4 +344,44 @@ describe("installDownloadSpec extraction safety (tar.bz2)", () => {
}
}
});
it("rejects tar.bz2 archives that change after preflight", async () => {
const entry = buildEntry("tbz2-preflight-change");
const targetDir = path.join(resolveSkillToolsRootDir(entry), "target");
const commandCallCount = runCommandWithTimeoutMock.mock.calls.length;
mockArchiveResponse(new Uint8Array([1, 2, 3]));
runCommandWithTimeoutMock.mockImplementation(async (...argv: unknown[]) => {
const cmd = (argv[0] ?? []) as string[];
if (cmd[0] === "tar" && cmd[1] === "tf") {
return runCommandResult({ stdout: "package/hello.txt\n" });
}
if (cmd[0] === "tar" && cmd[1] === "tvf") {
const archivePath = String(cmd[2] ?? "");
if (archivePath) {
await fs.appendFile(archivePath, "mutated");
}
return runCommandResult({ stdout: "-rw-r--r-- 0 0 0 0 Jan 1 00:00 package/hello.txt\n" });
}
if (cmd[0] === "tar" && cmd[1] === "xf") {
throw new Error("should not extract");
}
return runCommandResult();
});
const result = await installDownloadSkill({
name: "tbz2-preflight-change",
url: "https://example.invalid/change.tbz2",
archive: "tar.bz2",
targetDir,
});
expect(result.ok).toBe(false);
expect(result.stderr).toContain("changed during safety preflight");
const extractionAttempted = runCommandWithTimeoutMock.mock.calls
.slice(commandCallCount)
.some((call) => (call[0] as string[])[1] === "xf");
expect(extractionAttempted).toBe(false);
});
});

View File

@@ -176,23 +176,30 @@ describe("archive utils", () => {
},
);
it("rejects archives that exceed archive size budget", async () => {
await withArchiveCase("zip", async ({ archivePath, extractDir }) => {
const zip = new JSZip();
zip.file("package/file.txt", "ok");
await fs.writeFile(archivePath, await zip.generateAsync({ type: "nodebuffer" }));
const stat = await fs.stat(archivePath);
await expect(
extractArchive({
it.each([{ ext: "zip" as const }, { ext: "tar" as const }])(
"rejects $ext archives that exceed archive size budget",
async ({ ext }) => {
await withArchiveCase(ext, async ({ workDir, archivePath, extractDir }) => {
await writePackageArchive({
ext,
workDir,
archivePath,
destDir: extractDir,
timeoutMs: 5_000,
limits: { maxArchiveBytes: Math.max(1, stat.size - 1) },
}),
).rejects.toThrow("archive size exceeds limit");
});
});
fileName: "file.txt",
content: "ok",
});
const stat = await fs.stat(archivePath);
await expect(
extractArchive({
archivePath,
destDir: extractDir,
timeoutMs: 5_000,
limits: { maxArchiveBytes: Math.max(1, stat.size - 1) },
}),
).rejects.toThrow("archive size exceeds limit");
});
},
);
it("fails resolvePackedRootDir when extract dir has multiple root dirs", async () => {
const workDir = await makeTempDir("packed-root");

View File

@@ -21,8 +21,7 @@ export type ArchiveLogger = {
export type ArchiveExtractLimits = {
/**
* Max archive file bytes (compressed). Primarily protects zip extraction
* because we currently read the whole archive into memory for parsing.
* Max archive file bytes (compressed).
*/
maxArchiveBytes?: number;
/** Max number of extracted entries (files + dirs). */
@@ -457,7 +456,16 @@ async function extractZip(params: {
}
}
type TarEntryInfo = { path: string; type: string; size: number };
export type TarEntryInfo = { path: string; type: string; size: number };
const BLOCKED_TAR_ENTRY_TYPES = new Set([
"SymbolicLink",
"Link",
"BlockDevice",
"CharacterDevice",
"FIFO",
"Socket",
]);
function readTarEntryInfo(entry: unknown): TarEntryInfo {
const p =
@@ -479,6 +487,42 @@ function readTarEntryInfo(entry: unknown): TarEntryInfo {
return { path: p, type: t, size: s };
}
export function createTarEntrySafetyChecker(params: {
rootDir: string;
stripComponents?: number;
limits?: ArchiveExtractLimits;
escapeLabel?: string;
}): (entry: TarEntryInfo) => void {
const strip = Math.max(0, Math.floor(params.stripComponents ?? 0));
const limits = resolveExtractLimits(params.limits);
let entryCount = 0;
const budget = createByteBudgetTracker(limits);
return (entry: TarEntryInfo) => {
validateArchiveEntryPath(entry.path, { escapeLabel: params.escapeLabel });
const relPath = stripArchivePath(entry.path, strip);
if (!relPath) {
return;
}
validateArchiveEntryPath(relPath, { escapeLabel: params.escapeLabel });
resolveArchiveOutputPath({
rootDir: params.rootDir,
relPath,
originalPath: entry.path,
escapeLabel: params.escapeLabel,
});
if (BLOCKED_TAR_ENTRY_TYPES.has(entry.type)) {
throw new Error(`tar entry is a link: ${entry.path}`);
}
entryCount += 1;
assertArchiveEntryCountWithinLimit(entryCount, limits);
budget.addEntrySize(entry.size);
};
}
export async function extractArchive(params: {
archivePath: string;
destDir: string;
@@ -496,49 +540,28 @@ export async function extractArchive(params: {
const label = kind === "zip" ? "extract zip" : "extract tar";
if (kind === "tar") {
const strip = Math.max(0, Math.floor(params.stripComponents ?? 0));
const limits = resolveExtractLimits(params.limits);
let entryCount = 0;
const budget = createByteBudgetTracker(limits);
const stat = await fs.stat(params.archivePath);
if (stat.size > limits.maxArchiveBytes) {
throw new Error(ERROR_ARCHIVE_SIZE_EXCEEDS_LIMIT);
}
const checkTarEntrySafety = createTarEntrySafetyChecker({
rootDir: params.destDir,
stripComponents: params.stripComponents,
limits,
});
await withTimeout(
tar.x({
file: params.archivePath,
cwd: params.destDir,
strip,
strip: Math.max(0, Math.floor(params.stripComponents ?? 0)),
gzip: params.tarGzip,
preservePaths: false,
strict: true,
onReadEntry(entry) {
const info = readTarEntryInfo(entry);
try {
validateArchiveEntryPath(info.path);
const relPath = stripArchivePath(info.path, strip);
if (!relPath) {
return;
}
validateArchiveEntryPath(relPath);
resolveArchiveOutputPath({
rootDir: params.destDir,
relPath,
originalPath: info.path,
});
if (
info.type === "SymbolicLink" ||
info.type === "Link" ||
info.type === "BlockDevice" ||
info.type === "CharacterDevice" ||
info.type === "FIFO" ||
info.type === "Socket"
) {
throw new Error(`tar entry is a link: ${info.path}`);
}
entryCount += 1;
assertArchiveEntryCountWithinLimit(entryCount, limits);
budget.addEntrySize(info.size);
checkTarEntrySafety(readTarEntryInfo(entry));
} catch (err) {
const error = err instanceof Error ? err : new Error(String(err));
// Node's EventEmitter calls listeners with `this` bound to the