Compaction: preserve opaque identifiers in summaries (openclaw#25553) thanks @rodrigouroz

Verified: - pnpm install --frozen-lockfile - pnpm build - pnpm check - pnpm test:macmini Co-authored-by: rodrigouroz <384037+rodrigouroz@users.noreply.github.com> Co-authored-by: Tak Hoffman <781889+Takhoffman@users.noreply.github.com>
2026-05-10 13:44:58 +00:00 · 2026-02-27 11:14:05 -03:00
parent 84a88b2ace
commit 0fe6cf06b2
15 changed files with 344 additions and 4 deletions
--- a/src/agents/compaction.identifier-policy.test.ts
+++ b/src/agents/compaction.identifier-policy.test.ts
@@ -0,0 +1,117 @@
+import type { AgentMessage } from "@mariozechner/pi-agent-core";
+import type { ExtensionContext } from "@mariozechner/pi-coding-agent";
+import * as piCodingAgent from "@mariozechner/pi-coding-agent";
+import { beforeEach, describe, expect, it, vi } from "vitest";
+import { buildCompactionSummarizationInstructions, summarizeInStages } from "./compaction.js";
+
+vi.mock("@mariozechner/pi-coding-agent", async (importOriginal) => {
+  const actual = await importOriginal<typeof piCodingAgent>();
+  return {
+    ...actual,
+    generateSummary: vi.fn(),
+  };
+});
+
+const mockGenerateSummary = vi.mocked(piCodingAgent.generateSummary);
+
+function makeMessage(index: number, size = 1200): AgentMessage {
+  return {
+    role: "user",
+    content: `m${index}-${"x".repeat(size)}`,
+    timestamp: index,
+  };
+}
+
+describe("compaction identifier policy", () => {
+  const testModel = {
+    provider: "anthropic",
+    model: "claude-3-opus",
+    contextWindow: 200_000,
+  } as unknown as NonNullable<ExtensionContext["model"]>;
+
+  beforeEach(() => {
+    mockGenerateSummary.mockReset();
+    mockGenerateSummary.mockResolvedValue("summary");
+  });
+
+  it("defaults to strict identifier preservation", async () => {
+    await summarizeInStages({
+      messages: [makeMessage(1), makeMessage(2)],
+      model: testModel,
+      apiKey: "test-key",
+      signal: new AbortController().signal,
+      reserveTokens: 4000,
+      maxChunkTokens: 8000,
+      contextWindow: 200_000,
+    });
+
+    const firstCall = mockGenerateSummary.mock.calls[0];
+    expect(firstCall?.[5]).toContain("Preserve all opaque identifiers exactly as written");
+    expect(firstCall?.[5]).toContain("UUIDs");
+  });
+
+  it("can disable identifier preservation with off policy", async () => {
+    await summarizeInStages({
+      messages: [makeMessage(1), makeMessage(2)],
+      model: testModel,
+      apiKey: "test-key",
+      signal: new AbortController().signal,
+      reserveTokens: 4000,
+      maxChunkTokens: 8000,
+      contextWindow: 200_000,
+      summarizationInstructions: { identifierPolicy: "off" },
+    });
+
+    const firstCall = mockGenerateSummary.mock.calls[0];
+    expect(firstCall?.[5]).toBeUndefined();
+  });
+
+  it("supports custom identifier instructions", async () => {
+    await summarizeInStages({
+      messages: [makeMessage(1), makeMessage(2)],
+      model: testModel,
+      apiKey: "test-key",
+      signal: new AbortController().signal,
+      reserveTokens: 4000,
+      maxChunkTokens: 8000,
+      contextWindow: 200_000,
+      summarizationInstructions: {
+        identifierPolicy: "custom",
+        identifierInstructions: "Keep ticket IDs unchanged.",
+      },
+    });
+
+    const firstCall = mockGenerateSummary.mock.calls[0];
+    expect(firstCall?.[5]).toContain("Keep ticket IDs unchanged.");
+    expect(firstCall?.[5]).not.toContain("Preserve all opaque identifiers exactly as written");
+  });
+
+  it("falls back to strict text when custom policy is missing instructions", () => {
+    const built = buildCompactionSummarizationInstructions(undefined, {
+      identifierPolicy: "custom",
+      identifierInstructions: "   ",
+    });
+    expect(built).toContain("Preserve all opaque identifiers exactly as written");
+  });
+
+  it("avoids duplicate additional-focus headers in split+merge path", async () => {
+    await summarizeInStages({
+      messages: [makeMessage(1), makeMessage(2), makeMessage(3), makeMessage(4)],
+      model: testModel,
+      apiKey: "test-key",
+      signal: new AbortController().signal,
+      reserveTokens: 4000,
+      maxChunkTokens: 1000,
+      contextWindow: 200_000,
+      parts: 2,
+      minMessagesForSplit: 4,
+      customInstructions: "Prioritize customer-visible regressions.",
+    });
+
+    const mergedCall = mockGenerateSummary.mock.calls.at(-1);
+    const instructions = mergedCall?.[5] ?? "";
+    expect(instructions).toContain("Merge these partial summaries into a single cohesive summary.");
+    expect(instructions).toContain("Prioritize customer-visible regressions.");
+    expect((instructions.match(/Additional focus:/g) ?? []).length).toBe(1);
+  });
+});
--- a/src/agents/compaction.identifier-preservation.test.ts
+++ b/src/agents/compaction.identifier-preservation.test.ts
@@ -0,0 +1,128 @@
+import type { AgentMessage } from "@mariozechner/pi-agent-core";
+import type { ExtensionContext } from "@mariozechner/pi-coding-agent";
+import * as piCodingAgent from "@mariozechner/pi-coding-agent";
+import { beforeEach, describe, expect, it, vi } from "vitest";
+import { buildCompactionSummarizationInstructions, summarizeInStages } from "./compaction.js";
+
+vi.mock("@mariozechner/pi-coding-agent", async (importOriginal) => {
+  const actual = await importOriginal<typeof piCodingAgent>();
+  return {
+    ...actual,
+    generateSummary: vi.fn(),
+  };
+});
+
+const mockGenerateSummary = vi.mocked(piCodingAgent.generateSummary);
+
+function makeMessage(index: number, size = 1200): AgentMessage {
+  return {
+    role: "user",
+    content: `m${index}-${"x".repeat(size)}`,
+    timestamp: index,
+  };
+}
+
+describe("compaction identifier-preservation instructions", () => {
+  const testModel = {
+    provider: "anthropic",
+    model: "claude-3-opus",
+    contextWindow: 200_000,
+  } as unknown as NonNullable<ExtensionContext["model"]>;
+
+  beforeEach(() => {
+    mockGenerateSummary.mockReset();
+    mockGenerateSummary.mockResolvedValue("summary");
+  });
+
+  it("injects identifier-preservation guidance even without custom instructions", async () => {
+    await summarizeInStages({
+      messages: [makeMessage(1), makeMessage(2)],
+      model: testModel,
+      apiKey: "test-key",
+      signal: new AbortController().signal,
+      reserveTokens: 4000,
+      maxChunkTokens: 8000,
+      contextWindow: 200_000,
+    });
+
+    expect(mockGenerateSummary).toHaveBeenCalled();
+    const firstCall = mockGenerateSummary.mock.calls[0];
+    expect(firstCall?.[5]).toContain("Preserve all opaque identifiers exactly as written");
+    expect(firstCall?.[5]).toContain("UUIDs");
+    expect(firstCall?.[5]).toContain("IPs");
+    expect(firstCall?.[5]).toContain("ports");
+  });
+
+  it("keeps identifier-preservation guidance when custom instructions are provided", async () => {
+    await summarizeInStages({
+      messages: [makeMessage(1), makeMessage(2)],
+      model: testModel,
+      apiKey: "test-key",
+      signal: new AbortController().signal,
+      reserveTokens: 4000,
+      maxChunkTokens: 8000,
+      contextWindow: 200_000,
+      customInstructions: "Focus on release-impacting bugs.",
+    });
+
+    const firstCall = mockGenerateSummary.mock.calls[0];
+    expect(firstCall?.[5]).toContain("Preserve all opaque identifiers exactly as written");
+    expect(firstCall?.[5]).toContain("Additional focus:");
+    expect(firstCall?.[5]).toContain("Focus on release-impacting bugs.");
+  });
+
+  it("applies identifier-preservation guidance on staged split + merge summarization", async () => {
+    await summarizeInStages({
+      messages: [makeMessage(1), makeMessage(2), makeMessage(3), makeMessage(4)],
+      model: testModel,
+      apiKey: "test-key",
+      signal: new AbortController().signal,
+      reserveTokens: 4000,
+      maxChunkTokens: 1000,
+      contextWindow: 200_000,
+      parts: 2,
+      minMessagesForSplit: 4,
+    });
+
+    expect(mockGenerateSummary.mock.calls.length).toBeGreaterThan(1);
+    for (const call of mockGenerateSummary.mock.calls) {
+      expect(call[5]).toContain("Preserve all opaque identifiers exactly as written");
+    }
+  });
+
+  it("avoids duplicate additional-focus headers in split+merge path", async () => {
+    await summarizeInStages({
+      messages: [makeMessage(1), makeMessage(2), makeMessage(3), makeMessage(4)],
+      model: testModel,
+      apiKey: "test-key",
+      signal: new AbortController().signal,
+      reserveTokens: 4000,
+      maxChunkTokens: 1000,
+      contextWindow: 200_000,
+      parts: 2,
+      minMessagesForSplit: 4,
+      customInstructions: "Prioritize customer-visible regressions.",
+    });
+
+    const mergedCall = mockGenerateSummary.mock.calls.at(-1);
+    const instructions = mergedCall?.[5] ?? "";
+    expect(instructions).toContain("Merge these partial summaries into a single cohesive summary.");
+    expect(instructions).toContain("Prioritize customer-visible regressions.");
+    expect((instructions.match(/Additional focus:/g) ?? []).length).toBe(1);
+  });
+});
+
+describe("buildCompactionSummarizationInstructions", () => {
+  it("returns base instructions when no custom text is provided", () => {
+    const result = buildCompactionSummarizationInstructions();
+    expect(result).toContain("Preserve all opaque identifiers exactly as written");
+    expect(result).not.toContain("Additional focus:");
+  });
+
+  it("appends custom instructions in a stable format", () => {
+    const result = buildCompactionSummarizationInstructions("Keep deployment details.");
+    expect(result).toContain("Preserve all opaque identifiers exactly as written");
+    expect(result).toContain("Additional focus:");
+    expect(result).toContain("Keep deployment details.");
+  });
+});
--- a/src/agents/compaction.ts
+++ b/src/agents/compaction.ts
@@ -1,6 +1,7 @@
 import type { AgentMessage } from "@mariozechner/pi-agent-core";
 import type { ExtensionContext } from "@mariozechner/pi-coding-agent";
 import { estimateTokens, generateSummary } from "@mariozechner/pi-coding-agent";
+import type { AgentCompactionIdentifierPolicy } from "../config/types.agent-defaults.js";
 import { retryAsync } from "../infra/retry.js";
 import { createSubsystemLogger } from "../logging/subsystem.js";
 import { DEFAULT_CONTEXT_TOKENS } from "./defaults.js";
@@ -16,6 +17,46 @@ const DEFAULT_PARTS = 2;
 const MERGE_SUMMARIES_INSTRUCTIONS =
  "Merge these partial summaries into a single cohesive summary. Preserve decisions," +
  " TODOs, open questions, and any constraints.";
+const IDENTIFIER_PRESERVATION_INSTRUCTIONS =
+  "Preserve all opaque identifiers exactly as written (no shortening or reconstruction), " +
+  "including UUIDs, hashes, IDs, tokens, API keys, hostnames, IPs, ports, URLs, and file names.";
+
+export type CompactionSummarizationInstructions = {
+  identifierPolicy?: AgentCompactionIdentifierPolicy;
+  identifierInstructions?: string;
+};
+
+function resolveIdentifierPreservationInstructions(
+  instructions?: CompactionSummarizationInstructions,
+): string | undefined {
+  const policy = instructions?.identifierPolicy ?? "strict";
+  if (policy === "off") {
+    return undefined;
+  }
+  if (policy === "custom") {
+    const custom = instructions?.identifierInstructions?.trim();
+    return custom && custom.length > 0 ? custom : IDENTIFIER_PRESERVATION_INSTRUCTIONS;
+  }
+  return IDENTIFIER_PRESERVATION_INSTRUCTIONS;
+}
+
+export function buildCompactionSummarizationInstructions(
+  customInstructions?: string,
+  instructions?: CompactionSummarizationInstructions,
+): string | undefined {
+  const custom = customInstructions?.trim();
+  const identifierPreservation = resolveIdentifierPreservationInstructions(instructions);
+  if (!identifierPreservation && !custom) {
+    return undefined;
+  }
+  if (!custom) {
+    return identifierPreservation;
+  }
+  if (!identifierPreservation) {
+    return `Additional focus:\n${custom}`;
+  }
+  return `${identifierPreservation}\n\nAdditional focus:\n${custom}`;
+}

 export function estimateMessagesTokens(messages: AgentMessage[]): number {
  // SECURITY: toolResult.details can contain untrusted/verbose payloads; never include in LLM-facing compaction.
@@ -164,6 +205,7 @@ async function summarizeChunks(params: {
  reserveTokens: number;
  maxChunkTokens: number;
  customInstructions?: string;
+  summarizationInstructions?: CompactionSummarizationInstructions;
  previousSummary?: string;
 }): Promise<string> {
  if (params.messages.length === 0) {
@@ -174,7 +216,10 @@ async function summarizeChunks(params: {
  const safeMessages = stripToolResultDetails(params.messages);
  const chunks = chunkMessagesByMaxTokens(safeMessages, params.maxChunkTokens);
  let summary = params.previousSummary;
-
+  const effectiveInstructions = buildCompactionSummarizationInstructions(
+    params.customInstructions,
+    params.summarizationInstructions,
+  );
  for (const chunk of chunks) {
    summary = await retryAsync(
      () =>
@@ -184,7 +229,7 @@ async function summarizeChunks(params: {
          params.reserveTokens,
          params.apiKey,
          params.signal,
-          params.customInstructions,
+          effectiveInstructions,
          summary,
        ),
      {
@@ -214,6 +259,7 @@ export async function summarizeWithFallback(params: {
  maxChunkTokens: number;
  contextWindow: number;
  customInstructions?: string;
+  summarizationInstructions?: CompactionSummarizationInstructions;
  previousSummary?: string;
 }): Promise<string> {
  const { messages, contextWindow } = params;
@@ -282,6 +328,7 @@ export async function summarizeInStages(params: {
  maxChunkTokens: number;
  contextWindow: number;
  customInstructions?: string;
+  summarizationInstructions?: CompactionSummarizationInstructions;
  previousSummary?: string;
  parts?: number;
  minMessagesForSplit?: number;
@@ -325,8 +372,9 @@ export async function summarizeInStages(params: {
    timestamp: Date.now(),
  }));

-  const mergeInstructions = params.customInstructions
-    ? `${MERGE_SUMMARIES_INSTRUCTIONS}\n\nAdditional focus:\n${params.customInstructions}`
+  const custom = params.customInstructions?.trim();
+  const mergeInstructions = custom
+    ? `${MERGE_SUMMARIES_INSTRUCTIONS}\n\n${custom}`
    : MERGE_SUMMARIES_INSTRUCTIONS;

  return summarizeWithFallback({
--- a/src/agents/pi-embedded-runner/extensions.ts
+++ b/src/agents/pi-embedded-runner/extensions.ts
@@ -81,6 +81,8 @@ export function buildEmbeddedExtensionFactories(params: {
    setCompactionSafeguardRuntime(params.sessionManager, {
      maxHistoryShare: compactionCfg?.maxHistoryShare,
      contextWindowTokens: contextWindowInfo.tokens,
+      identifierPolicy: compactionCfg?.identifierPolicy,
+      identifierInstructions: compactionCfg?.identifierInstructions,
      model: params.model,
    });
    factories.push(compactionSafeguardExtension);
--- a/src/agents/pi-extensions/compaction-safeguard-runtime.ts
+++ b/src/agents/pi-extensions/compaction-safeguard-runtime.ts
@@ -1,9 +1,12 @@
 import type { Api, Model } from "@mariozechner/pi-ai";
+import type { AgentCompactionIdentifierPolicy } from "../../config/types.agent-defaults.js";
 import { createSessionManagerRuntimeRegistry } from "./session-manager-runtime-registry.js";

 export type CompactionSafeguardRuntimeValue = {
  maxHistoryShare?: number;
  contextWindowTokens?: number;
+  identifierPolicy?: AgentCompactionIdentifierPolicy;
+  identifierInstructions?: string;
  /**
   * Model to use for compaction summarization.
   * Passed through runtime because `ctx.model` is undefined in the compact.ts workflow
--- a/src/agents/pi-extensions/compaction-safeguard.ts
+++ b/src/agents/pi-extensions/compaction-safeguard.ts
@@ -212,6 +212,10 @@ export default function compactionSafeguardExtension(api: ExtensionAPI): void {
    // Model resolution: ctx.model is undefined in compact.ts workflow (extensionRunner.initialize() is never called).
    // Fall back to runtime.model which is explicitly passed when building extension paths.
    const runtime = getCompactionSafeguardRuntime(ctx.sessionManager);
+    const summarizationInstructions = {
+      identifierPolicy: runtime?.identifierPolicy,
+      identifierInstructions: runtime?.identifierInstructions,
+    };
    const model = ctx.model ?? runtime?.model;
    if (!model) {
      // Log warning once per session when both models are missing (diagnostic for future issues).
@@ -295,6 +299,7 @@ export default function compactionSafeguardExtension(api: ExtensionAPI): void {
                  maxChunkTokens: droppedMaxChunkTokens,
                  contextWindow: contextWindowTokens,
                  customInstructions,
+                  summarizationInstructions,
                  previousSummary: preparation.previousSummary,
                });
              } catch (droppedError) {
@@ -333,6 +338,7 @@ export default function compactionSafeguardExtension(api: ExtensionAPI): void {
        maxChunkTokens,
        contextWindow: contextWindowTokens,
        customInstructions,
+        summarizationInstructions,
        previousSummary: effectivePreviousSummary,
      });

@@ -347,6 +353,7 @@ export default function compactionSafeguardExtension(api: ExtensionAPI): void {
          maxChunkTokens,
          contextWindow: contextWindowTokens,
          customInstructions: TURN_PREFIX_INSTRUCTIONS,
+          summarizationInstructions,
          previousSummary: undefined,
        });
        summary = `${historySummary}\n\n---\n\n**Turn Context (split turn):**\n\n${prefixSummary}`;