test(security): add overflow compaction truncation-budget regression

2026-04-19 09:58:38 +00:00 · 2026-02-21 12:58:44 +01:00
parent 084f621025
commit b577228d6b
2 changed files with 71 additions and 1 deletions
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -29,6 +29,7 @@ Docs: https://docs.openclaw.ai
 ### Fixes
 - Security/Agents: keep overflow compaction retry budgeting global across tool-result truncation recovery so successful truncation cannot reset the overflow retry counter and amplify retry/cost cycles. This ships in the next npm release. Thanks @aether-ai-agent for reporting.
 - BlueBubbles/Security (optional beta iMessage plugin): require webhook token authentication for all BlueBubbles webhook requests (including loopback/proxied setups), removing passwordless webhook fallback behavior. Thanks @zpbrent.
 - iOS/Security: force `https://` for non-loopback manual gateway hosts during iOS onboarding to block insecure remote transport URLs. (#21969) Thanks @mbelinky.
 - Gateway/Security: remove shared-IP fallback for canvas endpoints and require token or session capability for canvas access. Thanks @thewilloftheshadow.
--- a/src/agents/pi-embedded-runner/run.overflow-compaction.test.ts
+++ b/src/agents/pi-embedded-runner/run.overflow-compaction.test.ts
@@ -2,11 +2,20 @@ import "./run.overflow-compaction.mocks.shared.js";
 import { beforeEach, describe, expect, it, vi } from "vitest";
 import { compactEmbeddedPiSessionDirect } from "./compact.js";
 import { runEmbeddedPiAgent } from "./run.js";
-import { mockOverflowRetrySuccess } from "./run.overflow-compaction.fixture.js";
+import { makeAttemptResult, mockOverflowRetrySuccess } from "./run.overflow-compaction.fixture.js";
 import { runEmbeddedAttempt } from "./run/attempt.js";
 import type { EmbeddedRunAttemptResult } from "./run/types.js";
 import {
  sessionLikelyHasOversizedToolResults,
  truncateOversizedToolResultsInSession,
 } from "./tool-result-truncation.js";
 const mockedRunEmbeddedAttempt = vi.mocked(runEmbeddedAttempt);
 const mockedCompactDirect = vi.mocked(compactEmbeddedPiSessionDirect);
 const mockedSessionLikelyHasOversizedToolResults = vi.mocked(sessionLikelyHasOversizedToolResults);
 const mockedTruncateOversizedToolResultsInSession = vi.mocked(
  truncateOversizedToolResultsInSession,
 );
 describe("runEmbeddedPiAgent overflow compaction trigger routing", () => {
  beforeEach(() => {
@@ -37,4 +46,64 @@ describe("runEmbeddedPiAgent overflow compaction trigger routing", () => {
      }),
    );
  });
  it("does not reset compaction attempt budget after successful tool-result truncation", async () => {
    const overflowError = new Error("request_too_large: Request size exceeds model context window");
    mockedRunEmbeddedAttempt
      .mockResolvedValueOnce(
        makeAttemptResult({
          promptError: overflowError,
          messagesSnapshot: [
            {
              role: "assistant",
              content: "big tool output",
            } as unknown as EmbeddedRunAttemptResult["messagesSnapshot"][number],
          ],
        }),
      )
      .mockResolvedValueOnce(makeAttemptResult({ promptError: overflowError }))
      .mockResolvedValueOnce(makeAttemptResult({ promptError: overflowError }))
      .mockResolvedValueOnce(makeAttemptResult({ promptError: overflowError }))
      // Keep one extra mocked response so legacy reset behavior does not crash the test.
      .mockResolvedValueOnce(makeAttemptResult({ promptError: overflowError }));
    mockedCompactDirect
      .mockResolvedValueOnce({
        ok: false,
        compacted: false,
        reason: "nothing to compact",
      })
      .mockResolvedValueOnce({
        ok: true,
        compacted: true,
        result: { summary: "Compacted 2", firstKeptEntryId: "entry-5", tokensBefore: 160000 },
      })
      .mockResolvedValueOnce({
        ok: true,
        compacted: true,
        result: { summary: "Compacted 3", firstKeptEntryId: "entry-7", tokensBefore: 140000 },
      });
    mockedSessionLikelyHasOversizedToolResults.mockReturnValue(true);
    mockedTruncateOversizedToolResultsInSession.mockResolvedValueOnce({
      truncated: true,
      truncatedCount: 1,
    });
    const result = await runEmbeddedPiAgent({
      sessionId: "test-session",
      sessionKey: "test-key",
      sessionFile: "/tmp/session.json",
      workspaceDir: "/tmp/workspace",
      prompt: "hello",
      timeoutMs: 30000,
      runId: "run-1",
    });
    expect(mockedCompactDirect).toHaveBeenCalledTimes(3);
    expect(mockedTruncateOversizedToolResultsInSession).toHaveBeenCalledTimes(1);
    expect(mockedRunEmbeddedAttempt).toHaveBeenCalledTimes(4);
    expect(result.meta.error?.kind).toBe("context_overflow");
  });
 });