From d3ee5deb87ee2ad0ab83c92c365611165423cb71 Mon Sep 17 00:00:00 2001
From: Peter Steinberger <steipete@gmail.com>
Date: Sat, 14 Feb 2026 15:30:05 +0100
Subject: [PATCH] fix(archive): enforce extraction resource limits

---
 CHANGELOG.md              |   1 +
 src/infra/archive.test.ts |  41 ++++++++
 src/infra/archive.ts      | 197 ++++++++++++++++++++++++++++++--------
 3 files changed, 200 insertions(+), 39 deletions(-)
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 3e0892a6f85..6f867bc4ab5 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -13,6 +13,7 @@ Docs: https://docs.openclaw.ai
 - Security: fix Chutes manual OAuth login state validation (thanks @aether-ai-agent). (#16058)
 - macOS: hard-limit unkeyed `openclaw://agent` deep links and ignore `deliver` / `to` / `channel` unless a valid unattended key is provided. Thanks @Cillian-Collins.
 - Security/Google Chat: deprecate `users/<email>` allowlists (treat `users/...` as immutable user id only); keep raw email allowlists for usability. Thanks @vincentkoc.
+- Security/Archive: enforce archive extraction entry/size limits to prevent resource exhaustion from high-expansion ZIP/TAR archives. Thanks @vincentkoc.
 
 ## 2026.2.14
 
diff --git a/src/infra/archive.test.ts b/src/infra/archive.test.ts
index 8a9e8471be4..970af55a29d 100644
--- a/src/infra/archive.test.ts
+++ b/src/infra/archive.test.ts
@@ -96,4 +96,45 @@ describe("archive utils", () => {
       extractArchive({ archivePath, destDir: extractDir, timeoutMs: 5_000 }),
     ).rejects.toThrow(/escapes destination/i);
   });
+
+  it("rejects zip archives that exceed extracted size budget", async () => {
+    const workDir = await makeTempDir();
+    const archivePath = path.join(workDir, "bundle.zip");
+    const extractDir = path.join(workDir, "extract");
+
+    const zip = new JSZip();
+    zip.file("package/big.txt", "x".repeat(64));
+    await fs.writeFile(archivePath, await zip.generateAsync({ type: "nodebuffer" }));
+
+    await fs.mkdir(extractDir, { recursive: true });
+    await expect(
+      extractArchive({
+        archivePath,
+        destDir: extractDir,
+        timeoutMs: 5_000,
+        limits: { maxExtractedBytes: 32 },
+      }),
+    ).rejects.toThrow("archive extracted size exceeds limit");
+  });
+
+  it("rejects tar archives that exceed extracted size budget", async () => {
+    const workDir = await makeTempDir();
+    const archivePath = path.join(workDir, "bundle.tar");
+    const extractDir = path.join(workDir, "extract");
+    const packageDir = path.join(workDir, "package");
+
+    await fs.mkdir(packageDir, { recursive: true });
+    await fs.writeFile(path.join(packageDir, "big.txt"), "x".repeat(64));
+    await tar.c({ cwd: workDir, file: archivePath }, ["package"]);
+
+    await fs.mkdir(extractDir, { recursive: true });
+    await expect(
+      extractArchive({
+        archivePath,
+        destDir: extractDir,
+        timeoutMs: 5_000,
+        limits: { maxExtractedBytes: 32 },
+      }),
+    ).rejects.toThrow("archive extracted size exceeds limit");
+  });
 });
diff --git a/src/infra/archive.ts b/src/infra/archive.ts
index 94266ee46b4..d9da45098c5 100644
--- a/src/infra/archive.ts
+++ b/src/infra/archive.ts
@@ -1,6 +1,9 @@
 import JSZip from "jszip";
+import { createWriteStream } from "node:fs";
 import fs from "node:fs/promises";
 import path from "node:path";
+import { Transform } from "node:stream";
+import { pipeline } from "node:stream/promises";
 import * as tar from "tar";
 
 export type ArchiveKind = "tar" | "zip";
@@ -10,6 +13,20 @@ export type ArchiveLogger = {
   warn?: (message: string) => void;
 };
 
+export type ArchiveExtractLimits = {
+  /**
+   * Max archive file bytes (compressed). Primarily protects zip extraction
+   * because we currently read the whole archive into memory for parsing.
+   */
+  maxArchiveBytes?: number;
+  /** Max number of extracted entries (files + dirs). */
+  maxEntries?: number;
+  /** Max extracted bytes (sum of all files). */
+  maxExtractedBytes?: number;
+  /** Max extracted bytes for a single file entry. */
+  maxEntryBytes?: number;
+};
+
 const TAR_SUFFIXES = [".tgz", ".tar.gz", ".tar"];
 
 export function resolveArchiveKind(filePath: string): ArchiveKind | null {
@@ -100,14 +117,14 @@ function validateArchiveEntryPath(entryPath: string): void {
 }
 
 function stripArchivePath(entryPath: string, stripComponents: number): string | null {
-  const normalized = path.posix.normalize(normalizeArchivePath(entryPath));
-  if (!normalized || normalized === "." || normalized === "./") {
+  const raw = normalizeArchivePath(entryPath);
+  if (!raw || raw === "." || raw === "./") {
     return null;
   }
 
-  // Keep the validation separate so callers can reject traversal in the original
-  // path even if stripping could make it "look" safe.
-  const parts = normalized.split("/").filter((part) => part.length > 0 && part !== ".");
+  // Important: mimic tar --strip-components semantics (raw segments before
+  // normalization) so strip-induced escapes like "a/../b" are not hidden.
+  const parts = raw.split("/").filter((part) => part.length > 0 && part !== ".");
   const strip = Math.max(0, Math.floor(stripComponents));
   const stripped = strip === 0 ? parts.join("/") : parts.slice(strip).join("/");
   const result = path.posix.normalize(stripped);
@@ -126,16 +143,63 @@ function resolveCheckedOutPath(destDir: string, relPath: string, original: strin
   return outPath;
 }
 
+function clampLimit(value: number | undefined): number | undefined {
+  if (typeof value !== "number" || !Number.isFinite(value)) {
+    return undefined;
+  }
+  const v = Math.floor(value);
+  return v > 0 ? v : undefined;
+}
+
+function resolveExtractLimits(limits?: ArchiveExtractLimits): Required<ArchiveExtractLimits> {
+  // Defaults: defensive, but should not break normal installs.
+  return {
+    maxArchiveBytes: clampLimit(limits?.maxArchiveBytes) ?? 256 * 1024 * 1024,
+    maxEntries: clampLimit(limits?.maxEntries) ?? 50_000,
+    maxExtractedBytes: clampLimit(limits?.maxExtractedBytes) ?? 512 * 1024 * 1024,
+    maxEntryBytes: clampLimit(limits?.maxEntryBytes) ?? 256 * 1024 * 1024,
+  };
+}
+
+function createExtractBudgetTransform(params: {
+  onChunkBytes: (bytes: number) => void;
+}): Transform {
+  return new Transform({
+    transform(chunk, _encoding, callback) {
+      try {
+        const buf = chunk instanceof Buffer ? chunk : Buffer.from(chunk as Uint8Array);
+        params.onChunkBytes(buf.byteLength);
+        callback(null, buf);
+      } catch (err) {
+        callback(err instanceof Error ? err : new Error(String(err)));
+      }
+    },
+  });
+}
+
 async function extractZip(params: {
   archivePath: string;
   destDir: string;
   stripComponents?: number;
+  limits?: ArchiveExtractLimits;
 }): Promise<void> {
+  const limits = resolveExtractLimits(params.limits);
+  const stat = await fs.stat(params.archivePath);
+  if (stat.size > limits.maxArchiveBytes) {
+    throw new Error("archive size exceeds limit");
+  }
+
   const buffer = await fs.readFile(params.archivePath);
   const zip = await JSZip.loadAsync(buffer);
   const entries = Object.values(zip.files);
   const strip = Math.max(0, Math.floor(params.stripComponents ?? 0));
 
+  if (entries.length > limits.maxEntries) {
+    throw new Error("archive entry count exceeds limit");
+  }
+
+  let extractedBytes = 0;
+
   for (const entry of entries) {
     validateArchiveEntryPath(entry.name);
 
@@ -152,8 +216,38 @@ async function extractZip(params: {
     }
 
     await fs.mkdir(path.dirname(outPath), { recursive: true });
-    const data = await entry.async("nodebuffer");
-    await fs.writeFile(outPath, data);
+    let entryBytes = 0;
+    const onChunkBytes = (bytes: number) => {
+      entryBytes += bytes;
+      if (entryBytes > limits.maxEntryBytes) {
+        throw new Error("archive entry extracted size exceeds limit");
+      }
+      extractedBytes += bytes;
+      if (extractedBytes > limits.maxExtractedBytes) {
+        throw new Error("archive extracted size exceeds limit");
+      }
+    };
+
+    const readable =
+      typeof entry.nodeStream === "function"
+        ? (entry.nodeStream() as unknown)
+        : await entry.async("nodebuffer");
+
+    try {
+      if (readable instanceof Buffer) {
+        onChunkBytes(readable.byteLength);
+        await fs.writeFile(outPath, readable);
+      } else {
+        await pipeline(
+          readable as NodeJS.ReadableStream,
+          createExtractBudgetTransform({ onChunkBytes }),
+          createWriteStream(outPath),
+        );
+      }
+    } catch (err) {
+      await fs.unlink(outPath).catch(() => undefined);
+      throw err;
+    }
 
     // Best-effort permission restore for zip entries created on unix.
     if (typeof entry.unixPermissions === "number") {
@@ -172,6 +266,7 @@ export async function extractArchive(params: {
   kind?: ArchiveKind;
   stripComponents?: number;
   tarGzip?: boolean;
+  limits?: ArchiveExtractLimits;
   logger?: ArchiveLogger;
 }): Promise<void> {
   const kind = params.kind ?? resolveArchiveKind(params.archivePath);
@@ -182,7 +277,9 @@ export async function extractArchive(params: {
   const label = kind === "zip" ? "extract zip" : "extract tar";
   if (kind === "tar") {
     const strip = Math.max(0, Math.floor(params.stripComponents ?? 0));
-    let firstError: Error | undefined;
+    const limits = resolveExtractLimits(params.limits);
+    let entryCount = 0;
+    let extractedBytes = 0;
     await withTimeout(
       tar.x({
         file: params.archivePath,
@@ -191,48 +288,69 @@ export async function extractArchive(params: {
         gzip: params.tarGzip,
         preservePaths: false,
         strict: true,
-        filter: (entryPath, entry) => {
+        onReadEntry(entry) {
+          const archiveEntryPath =
+            typeof entry === "object" && entry !== null && "path" in entry
+              ? String((entry as { path: unknown }).path)
+              : "";
+          const archiveEntryType =
+            typeof entry === "object" && entry !== null && "type" in entry
+              ? String((entry as { type: unknown }).type)
+              : "";
+          const archiveEntrySize =
+            typeof entry === "object" &&
+            entry !== null &&
+            "size" in entry &&
+            typeof (entry as { size?: unknown }).size === "number" &&
+            Number.isFinite((entry as { size: number }).size)
+              ? Math.max(0, Math.floor((entry as { size: number }).size))
+              : 0;
+
           try {
-            validateArchiveEntryPath(entryPath);
-            // `tar`'s filter callback can pass either a ReadEntry or a Stats-ish object;
-            // fail closed for any link-like entries.
-            const entryType =
-              typeof entry === "object" &&
-              entry !== null &&
-              "type" in entry &&
-              typeof (entry as { type?: unknown }).type === "string"
-                ? (entry as { type: string }).type
-                : undefined;
-            const isSymlink =
-              typeof entry === "object" &&
-              entry !== null &&
-              "isSymbolicLink" in entry &&
-              typeof (entry as { isSymbolicLink?: unknown }).isSymbolicLink === "function" &&
-              Boolean((entry as { isSymbolicLink: () => boolean }).isSymbolicLink());
-            if (entryType === "SymbolicLink" || entryType === "Link" || isSymlink) {
-              throw new Error(`tar entry is a link: ${entryPath}`);
-            }
-            const relPath = stripArchivePath(entryPath, strip);
+            validateArchiveEntryPath(archiveEntryPath);
+
+            const relPath = stripArchivePath(archiveEntryPath, strip);
             if (!relPath) {
-              return false;
+              return;
             }
             validateArchiveEntryPath(relPath);
-            resolveCheckedOutPath(params.destDir, relPath, entryPath);
-            return true;
-          } catch (err) {
-            if (!firstError) {
-              firstError = err instanceof Error ? err : new Error(String(err));
+            resolveCheckedOutPath(params.destDir, relPath, archiveEntryPath);
+
+            if (
+              archiveEntryType === "SymbolicLink" ||
+              archiveEntryType === "Link" ||
+              archiveEntryType === "BlockDevice" ||
+              archiveEntryType === "CharacterDevice" ||
+              archiveEntryType === "FIFO" ||
+              archiveEntryType === "Socket"
+            ) {
+              throw new Error(`tar entry is a link: ${archiveEntryPath}`);
             }
-            return false;
+
+            entryCount += 1;
+            if (entryCount > limits.maxEntries) {
+              throw new Error("archive entry count exceeds limit");
+            }
+
+            if (archiveEntrySize > limits.maxEntryBytes) {
+              throw new Error("archive entry extracted size exceeds limit");
+            }
+            extractedBytes += archiveEntrySize;
+            if (extractedBytes > limits.maxExtractedBytes) {
+              throw new Error("archive extracted size exceeds limit");
+            }
+          } catch (err) {
+            const error = err instanceof Error ? err : new Error(String(err));
+            // Node's EventEmitter calls listeners with `this` bound to the
+            // emitter (tar.Unpack), which exposes Parser.abort().
+            const emitter = this as unknown as { abort?: (error: Error) => void };
+            emitter.abort?.(error);
           }
         },
       }),
       params.timeoutMs,
       label,
     );
-    if (firstError) {
-      throw firstError;
-    }
     return;
   }
 
@@ -241,6 +359,7 @@ export async function extractArchive(params: {
       archivePath: params.archivePath,
       destDir: params.destDir,
       stripComponents: params.stripComponents,
+      limits: params.limits,
     }),
     params.timeoutMs,
     label,