From d3ee5deb87ee2ad0ab83c92c365611165423cb71 Mon Sep 17 00:00:00 2001 From: Peter Steinberger Date: Sat, 14 Feb 2026 15:30:05 +0100 Subject: [PATCH] fix(archive): enforce extraction resource limits --- CHANGELOG.md | 1 + src/infra/archive.test.ts | 41 ++++++++ src/infra/archive.ts | 197 ++++++++++++++++++++++++++++++-------- 3 files changed, 200 insertions(+), 39 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 3e0892a6f85..6f867bc4ab5 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -13,6 +13,7 @@ Docs: https://docs.openclaw.ai - Security: fix Chutes manual OAuth login state validation (thanks @aether-ai-agent). (#16058) - macOS: hard-limit unkeyed `openclaw://agent` deep links and ignore `deliver` / `to` / `channel` unless a valid unattended key is provided. Thanks @Cillian-Collins. - Security/Google Chat: deprecate `users/` allowlists (treat `users/...` as immutable user id only); keep raw email allowlists for usability. Thanks @vincentkoc. +- Security/Archive: enforce archive extraction entry/size limits to prevent resource exhaustion from high-expansion ZIP/TAR archives. Thanks @vincentkoc. ## 2026.2.14 diff --git a/src/infra/archive.test.ts b/src/infra/archive.test.ts index 8a9e8471be4..970af55a29d 100644 --- a/src/infra/archive.test.ts +++ b/src/infra/archive.test.ts @@ -96,4 +96,45 @@ describe("archive utils", () => { extractArchive({ archivePath, destDir: extractDir, timeoutMs: 5_000 }), ).rejects.toThrow(/escapes destination/i); }); + + it("rejects zip archives that exceed extracted size budget", async () => { + const workDir = await makeTempDir(); + const archivePath = path.join(workDir, "bundle.zip"); + const extractDir = path.join(workDir, "extract"); + + const zip = new JSZip(); + zip.file("package/big.txt", "x".repeat(64)); + await fs.writeFile(archivePath, await zip.generateAsync({ type: "nodebuffer" })); + + await fs.mkdir(extractDir, { recursive: true }); + await expect( + extractArchive({ + archivePath, + destDir: extractDir, + timeoutMs: 5_000, + limits: { maxExtractedBytes: 32 }, + }), + ).rejects.toThrow("archive extracted size exceeds limit"); + }); + + it("rejects tar archives that exceed extracted size budget", async () => { + const workDir = await makeTempDir(); + const archivePath = path.join(workDir, "bundle.tar"); + const extractDir = path.join(workDir, "extract"); + const packageDir = path.join(workDir, "package"); + + await fs.mkdir(packageDir, { recursive: true }); + await fs.writeFile(path.join(packageDir, "big.txt"), "x".repeat(64)); + await tar.c({ cwd: workDir, file: archivePath }, ["package"]); + + await fs.mkdir(extractDir, { recursive: true }); + await expect( + extractArchive({ + archivePath, + destDir: extractDir, + timeoutMs: 5_000, + limits: { maxExtractedBytes: 32 }, + }), + ).rejects.toThrow("archive extracted size exceeds limit"); + }); }); diff --git a/src/infra/archive.ts b/src/infra/archive.ts index 94266ee46b4..d9da45098c5 100644 --- a/src/infra/archive.ts +++ b/src/infra/archive.ts @@ -1,6 +1,9 @@ import JSZip from "jszip"; +import { createWriteStream } from "node:fs"; import fs from "node:fs/promises"; import path from "node:path"; +import { Transform } from "node:stream"; +import { pipeline } from "node:stream/promises"; import * as tar from "tar"; export type ArchiveKind = "tar" | "zip"; @@ -10,6 +13,20 @@ export type ArchiveLogger = { warn?: (message: string) => void; }; +export type ArchiveExtractLimits = { + /** + * Max archive file bytes (compressed). Primarily protects zip extraction + * because we currently read the whole archive into memory for parsing. + */ + maxArchiveBytes?: number; + /** Max number of extracted entries (files + dirs). */ + maxEntries?: number; + /** Max extracted bytes (sum of all files). */ + maxExtractedBytes?: number; + /** Max extracted bytes for a single file entry. */ + maxEntryBytes?: number; +}; + const TAR_SUFFIXES = [".tgz", ".tar.gz", ".tar"]; export function resolveArchiveKind(filePath: string): ArchiveKind | null { @@ -100,14 +117,14 @@ function validateArchiveEntryPath(entryPath: string): void { } function stripArchivePath(entryPath: string, stripComponents: number): string | null { - const normalized = path.posix.normalize(normalizeArchivePath(entryPath)); - if (!normalized || normalized === "." || normalized === "./") { + const raw = normalizeArchivePath(entryPath); + if (!raw || raw === "." || raw === "./") { return null; } - // Keep the validation separate so callers can reject traversal in the original - // path even if stripping could make it "look" safe. - const parts = normalized.split("/").filter((part) => part.length > 0 && part !== "."); + // Important: mimic tar --strip-components semantics (raw segments before + // normalization) so strip-induced escapes like "a/../b" are not hidden. + const parts = raw.split("/").filter((part) => part.length > 0 && part !== "."); const strip = Math.max(0, Math.floor(stripComponents)); const stripped = strip === 0 ? parts.join("/") : parts.slice(strip).join("/"); const result = path.posix.normalize(stripped); @@ -126,16 +143,63 @@ function resolveCheckedOutPath(destDir: string, relPath: string, original: strin return outPath; } +function clampLimit(value: number | undefined): number | undefined { + if (typeof value !== "number" || !Number.isFinite(value)) { + return undefined; + } + const v = Math.floor(value); + return v > 0 ? v : undefined; +} + +function resolveExtractLimits(limits?: ArchiveExtractLimits): Required { + // Defaults: defensive, but should not break normal installs. + return { + maxArchiveBytes: clampLimit(limits?.maxArchiveBytes) ?? 256 * 1024 * 1024, + maxEntries: clampLimit(limits?.maxEntries) ?? 50_000, + maxExtractedBytes: clampLimit(limits?.maxExtractedBytes) ?? 512 * 1024 * 1024, + maxEntryBytes: clampLimit(limits?.maxEntryBytes) ?? 256 * 1024 * 1024, + }; +} + +function createExtractBudgetTransform(params: { + onChunkBytes: (bytes: number) => void; +}): Transform { + return new Transform({ + transform(chunk, _encoding, callback) { + try { + const buf = chunk instanceof Buffer ? chunk : Buffer.from(chunk as Uint8Array); + params.onChunkBytes(buf.byteLength); + callback(null, buf); + } catch (err) { + callback(err instanceof Error ? err : new Error(String(err))); + } + }, + }); +} + async function extractZip(params: { archivePath: string; destDir: string; stripComponents?: number; + limits?: ArchiveExtractLimits; }): Promise { + const limits = resolveExtractLimits(params.limits); + const stat = await fs.stat(params.archivePath); + if (stat.size > limits.maxArchiveBytes) { + throw new Error("archive size exceeds limit"); + } + const buffer = await fs.readFile(params.archivePath); const zip = await JSZip.loadAsync(buffer); const entries = Object.values(zip.files); const strip = Math.max(0, Math.floor(params.stripComponents ?? 0)); + if (entries.length > limits.maxEntries) { + throw new Error("archive entry count exceeds limit"); + } + + let extractedBytes = 0; + for (const entry of entries) { validateArchiveEntryPath(entry.name); @@ -152,8 +216,38 @@ async function extractZip(params: { } await fs.mkdir(path.dirname(outPath), { recursive: true }); - const data = await entry.async("nodebuffer"); - await fs.writeFile(outPath, data); + let entryBytes = 0; + const onChunkBytes = (bytes: number) => { + entryBytes += bytes; + if (entryBytes > limits.maxEntryBytes) { + throw new Error("archive entry extracted size exceeds limit"); + } + extractedBytes += bytes; + if (extractedBytes > limits.maxExtractedBytes) { + throw new Error("archive extracted size exceeds limit"); + } + }; + + const readable = + typeof entry.nodeStream === "function" + ? (entry.nodeStream() as unknown) + : await entry.async("nodebuffer"); + + try { + if (readable instanceof Buffer) { + onChunkBytes(readable.byteLength); + await fs.writeFile(outPath, readable); + } else { + await pipeline( + readable as NodeJS.ReadableStream, + createExtractBudgetTransform({ onChunkBytes }), + createWriteStream(outPath), + ); + } + } catch (err) { + await fs.unlink(outPath).catch(() => undefined); + throw err; + } // Best-effort permission restore for zip entries created on unix. if (typeof entry.unixPermissions === "number") { @@ -172,6 +266,7 @@ export async function extractArchive(params: { kind?: ArchiveKind; stripComponents?: number; tarGzip?: boolean; + limits?: ArchiveExtractLimits; logger?: ArchiveLogger; }): Promise { const kind = params.kind ?? resolveArchiveKind(params.archivePath); @@ -182,7 +277,9 @@ export async function extractArchive(params: { const label = kind === "zip" ? "extract zip" : "extract tar"; if (kind === "tar") { const strip = Math.max(0, Math.floor(params.stripComponents ?? 0)); - let firstError: Error | undefined; + const limits = resolveExtractLimits(params.limits); + let entryCount = 0; + let extractedBytes = 0; await withTimeout( tar.x({ file: params.archivePath, @@ -191,48 +288,69 @@ export async function extractArchive(params: { gzip: params.tarGzip, preservePaths: false, strict: true, - filter: (entryPath, entry) => { + onReadEntry(entry) { + const archiveEntryPath = + typeof entry === "object" && entry !== null && "path" in entry + ? String((entry as { path: unknown }).path) + : ""; + const archiveEntryType = + typeof entry === "object" && entry !== null && "type" in entry + ? String((entry as { type: unknown }).type) + : ""; + const archiveEntrySize = + typeof entry === "object" && + entry !== null && + "size" in entry && + typeof (entry as { size?: unknown }).size === "number" && + Number.isFinite((entry as { size: number }).size) + ? Math.max(0, Math.floor((entry as { size: number }).size)) + : 0; + try { - validateArchiveEntryPath(entryPath); - // `tar`'s filter callback can pass either a ReadEntry or a Stats-ish object; - // fail closed for any link-like entries. - const entryType = - typeof entry === "object" && - entry !== null && - "type" in entry && - typeof (entry as { type?: unknown }).type === "string" - ? (entry as { type: string }).type - : undefined; - const isSymlink = - typeof entry === "object" && - entry !== null && - "isSymbolicLink" in entry && - typeof (entry as { isSymbolicLink?: unknown }).isSymbolicLink === "function" && - Boolean((entry as { isSymbolicLink: () => boolean }).isSymbolicLink()); - if (entryType === "SymbolicLink" || entryType === "Link" || isSymlink) { - throw new Error(`tar entry is a link: ${entryPath}`); - } - const relPath = stripArchivePath(entryPath, strip); + validateArchiveEntryPath(archiveEntryPath); + + const relPath = stripArchivePath(archiveEntryPath, strip); if (!relPath) { - return false; + return; } validateArchiveEntryPath(relPath); - resolveCheckedOutPath(params.destDir, relPath, entryPath); - return true; - } catch (err) { - if (!firstError) { - firstError = err instanceof Error ? err : new Error(String(err)); + resolveCheckedOutPath(params.destDir, relPath, archiveEntryPath); + + if ( + archiveEntryType === "SymbolicLink" || + archiveEntryType === "Link" || + archiveEntryType === "BlockDevice" || + archiveEntryType === "CharacterDevice" || + archiveEntryType === "FIFO" || + archiveEntryType === "Socket" + ) { + throw new Error(`tar entry is a link: ${archiveEntryPath}`); } - return false; + + entryCount += 1; + if (entryCount > limits.maxEntries) { + throw new Error("archive entry count exceeds limit"); + } + + if (archiveEntrySize > limits.maxEntryBytes) { + throw new Error("archive entry extracted size exceeds limit"); + } + extractedBytes += archiveEntrySize; + if (extractedBytes > limits.maxExtractedBytes) { + throw new Error("archive extracted size exceeds limit"); + } + } catch (err) { + const error = err instanceof Error ? err : new Error(String(err)); + // Node's EventEmitter calls listeners with `this` bound to the + // emitter (tar.Unpack), which exposes Parser.abort(). + const emitter = this as unknown as { abort?: (error: Error) => void }; + emitter.abort?.(error); } }, }), params.timeoutMs, label, ); - if (firstError) { - throw firstError; - } return; } @@ -241,6 +359,7 @@ export async function extractArchive(params: { archivePath: params.archivePath, destDir: params.destDir, stripComponents: params.stripComponents, + limits: params.limits, }), params.timeoutMs, label,