From d783ffb44d34d3895d2791615b556243602f89d2 Mon Sep 17 00:00:00 2001 From: Rodrigo Uroz Date: Tue, 24 Feb 2026 16:32:34 +0000 Subject: [PATCH] Compaction/Safeguard: add summary quality audit retries --- .../compaction-safeguard-runtime.ts | 2 + .../compaction-safeguard.test.ts | 74 +++++++++ .../pi-extensions/compaction-safeguard.ts | 156 +++++++++++++++--- 3 files changed, 212 insertions(+), 20 deletions(-) diff --git a/src/agents/pi-extensions/compaction-safeguard-runtime.ts b/src/agents/pi-extensions/compaction-safeguard-runtime.ts index b33bc82ffe9..988d08b5623 100644 --- a/src/agents/pi-extensions/compaction-safeguard-runtime.ts +++ b/src/agents/pi-extensions/compaction-safeguard-runtime.ts @@ -11,6 +11,8 @@ export type CompactionSafeguardRuntimeValue = { */ model?: Model; recentTurnsPreserve?: number; + qualityGuardEnabled?: boolean; + qualityGuardMaxRetries?: number; }; const registry = createSessionManagerRuntimeRegistry(); diff --git a/src/agents/pi-extensions/compaction-safeguard.test.ts b/src/agents/pi-extensions/compaction-safeguard.test.ts index 36c6bc753ae..673d543f61c 100644 --- a/src/agents/pi-extensions/compaction-safeguard.test.ts +++ b/src/agents/pi-extensions/compaction-safeguard.test.ts @@ -14,7 +14,10 @@ const { splitPreservedRecentTurns, formatPreservedTurnsSection, buildCompactionStructureInstructions, + extractOpaqueIdentifiers, + auditSummaryQuality, resolveRecentTurnsPreserve, + resolveQualityGuardMaxRetries, computeAdaptiveChunkRatio, isOversizedForSummary, BASE_CHUNK_RATIO, @@ -408,6 +411,77 @@ describe("compaction-safeguard recent-turn preservation", () => { expect(instructions).toContain("## Exact identifiers"); expect(instructions).toContain("Keep security caveats."); }); + + it("extracts opaque identifiers and audits summary quality", () => { + const identifiers = extractOpaqueIdentifiers( + "Track id a1b2c3d4e5f6 plus A1B2C3D4E5F6 and URL https://example.com/a and /tmp/x.log plus port host.local:18789", + ); + expect(identifiers.length).toBeGreaterThan(0); + expect(identifiers).toContain("A1B2C3D4E5F6"); + + const summary = [ + "## Decisions", + "Keep current flow.", + "## Open TODOs", + "None.", + "## Constraints/Rules", + "Preserve identifiers.", + "## Pending user asks", + "Explain post-compaction behavior.", + "## Exact identifiers", + identifiers.join(", "), + ].join("\n"); + + const quality = auditSummaryQuality({ + summary, + identifiers, + latestAsk: "Explain post-compaction behavior for memory indexing", + }); + expect(quality.ok).toBe(true); + }); + + it("dedupes identifiers before applying the result cap", () => { + const noisyPrefix = Array.from({ length: 10 }, () => "a0b0c0d0").join(" "); + const uniqueTail = Array.from( + { length: 12 }, + (_, idx) => `b${idx.toString(16).padStart(7, "0")}`, + ); + const identifiers = extractOpaqueIdentifiers(`${noisyPrefix} ${uniqueTail.join(" ")}`); + + expect(identifiers).toHaveLength(12); + expect(new Set(identifiers).size).toBe(12); + expect(identifiers).toContain("a0b0c0d0"); + expect(identifiers).toContain(uniqueTail[10]); + }); + + it("filters ordinary short numbers and trims wrapped punctuation", () => { + const identifiers = extractOpaqueIdentifiers( + "Year 2026 count 42 port 18789 ticket 123456 URL https://example.com/a, path /tmp/x.log.", + ); + + expect(identifiers).not.toContain("2026"); + expect(identifiers).not.toContain("42"); + expect(identifiers).not.toContain("18789"); + expect(identifiers).toContain("123456"); + expect(identifiers).toContain("https://example.com/a"); + expect(identifiers).toContain("/tmp/x.log"); + }); + + it("fails quality audit when required sections are missing", () => { + const quality = auditSummaryQuality({ + summary: "Short summary without structure", + identifiers: ["abc12345"], + latestAsk: "Need a status update", + }); + expect(quality.ok).toBe(false); + expect(quality.reasons.length).toBeGreaterThan(0); + }); + + it("clamps quality-guard retries into a safe range", () => { + expect(resolveQualityGuardMaxRetries(undefined)).toBe(1); + expect(resolveQualityGuardMaxRetries(-1)).toBe(0); + expect(resolveQualityGuardMaxRetries(99)).toBe(3); + }); }); describe("compaction-safeguard extension model fallback", () => { diff --git a/src/agents/pi-extensions/compaction-safeguard.ts b/src/agents/pi-extensions/compaction-safeguard.ts index a90df969068..4fd2e14e398 100644 --- a/src/agents/pi-extensions/compaction-safeguard.ts +++ b/src/agents/pi-extensions/compaction-safeguard.ts @@ -29,8 +29,11 @@ const TURN_PREFIX_INSTRUCTIONS = const MAX_TOOL_FAILURES = 8; const MAX_TOOL_FAILURE_CHARS = 240; const DEFAULT_RECENT_TURNS_PRESERVE = 3; +const DEFAULT_QUALITY_GUARD_MAX_RETRIES = 1; const MAX_RECENT_TURNS_PRESERVE = 12; +const MAX_QUALITY_GUARD_MAX_RETRIES = 3; const MAX_RECENT_TURN_TEXT_CHARS = 600; +const MAX_EXTRACTED_IDENTIFIERS = 12; const REQUIRED_SUMMARY_SECTIONS = [ "## Decisions", "## Open TODOs", @@ -58,6 +61,13 @@ function resolveRecentTurnsPreserve(value: unknown): number { ); } +function resolveQualityGuardMaxRetries(value: unknown): number { + return Math.min( + MAX_QUALITY_GUARD_MAX_RETRIES, + clampNonNegativeInt(value, DEFAULT_QUALITY_GUARD_MAX_RETRIES), + ); +} + function normalizeFailureText(text: string): string { return text.replace(/\s+/g, " ").trim(); } @@ -275,6 +285,78 @@ function buildCompactionStructureInstructions(customInstructions?: string): stri return `${sectionsTemplate}\n\nAdditional focus:\n${custom}`; } +function sanitizeExtractedIdentifier(value: string): string { + return value + .trim() + .replace(/^[("'`[{<]+/, "") + .replace(/[)\]"'`,;:.!?<>]+$/, ""); +} + +function extractOpaqueIdentifiers(text: string): string[] { + const matches = + text.match( + /([A-Fa-f0-9]{8,}|https?:\/\/\S+|\/[\w./-]+|[A-Za-z]:\\[\w\\.-]+|[A-Za-z0-9._-]+\.[A-Za-z0-9._/-]+:\d{1,5}|\b\d{6,}\b)/g, + ) ?? []; + return Array.from( + new Set( + matches + .map((value) => sanitizeExtractedIdentifier(value)) + .filter((value) => value.length >= 4), + ), + ).slice(0, MAX_EXTRACTED_IDENTIFIERS); +} + +function extractLatestUserAsk(messages: AgentMessage[]): string | null { + for (let i = messages.length - 1; i >= 0; i -= 1) { + const msg = messages[i]; + if (msg.role !== "user") { + continue; + } + const text = extractMessageText(msg); + if (text) { + return text; + } + } + return null; +} + +function hasAskOverlap(summary: string, latestAsk: string | null): boolean { + if (!latestAsk) { + return true; + } + const normalizedSummary = summary.toLowerCase(); + const tokens = latestAsk + .toLowerCase() + .split(/[^a-z0-9]+/g) + .filter((token) => token.length >= 5) + .slice(0, 8); + if (tokens.length === 0) { + return true; + } + return tokens.some((token) => normalizedSummary.includes(token)); +} + +function auditSummaryQuality(params: { + summary: string; + identifiers: string[]; + latestAsk: string | null; +}): { ok: boolean; reasons: string[] } { + const reasons: string[] = []; + for (const section of REQUIRED_SUMMARY_SECTIONS) { + if (!params.summary.includes(section)) { + reasons.push(`missing_section:${section}`); + } + } + const missingIdentifiers = params.identifiers.filter((id) => !params.summary.includes(id)); + if (missingIdentifiers.length > 0) { + reasons.push(`missing_identifiers:${missingIdentifiers.slice(0, 3).join(",")}`); + } + if (!hasAskOverlap(params.summary, params.latestAsk)) { + reasons.push("latest_user_ask_not_reflected"); + } + return { ok: reasons.length === 0, reasons }; +} + /** * Read and format critical workspace context for compaction summary. * Extracts "Session Startup" and "Red Lines" from AGENTS.md. @@ -358,6 +440,8 @@ export default function compactionSafeguardExtension(api: ExtensionAPI): void { const turnPrefixMessages = preparation.turnPrefixMessages ?? []; let messagesToSummarize = preparation.messagesToSummarize; const recentTurnsPreserve = resolveRecentTurnsPreserve(runtime?.recentTurnsPreserve); + const qualityGuardEnabled = runtime?.qualityGuardEnabled ?? true; + const qualityGuardMaxRetries = resolveQualityGuardMaxRetries(runtime?.qualityGuardMaxRetries); const maxHistoryShare = runtime?.maxHistoryShare ?? 0.5; @@ -436,6 +520,17 @@ export default function compactionSafeguardExtension(api: ExtensionAPI): void { }); messagesToSummarize = summaryTargetMessages; const preservedTurnsSection = formatPreservedTurnsSection(preservedRecentMessages); + const latestUserAsk = extractLatestUserAsk([ + ...messagesToSummarize, + ...preservedRecentMessages, + ...turnPrefixMessages, + ]); + const identifierSeedText = [...messagesToSummarize, ...preservedRecentMessages] + .slice(-10) + .map((message) => extractMessageText(message)) + .filter(Boolean) + .join("\n"); + const identifiers = extractOpaqueIdentifiers(identifierSeedText); const structuredInstructions = buildCompactionStructureInstructions(customInstructions); // Use adaptive chunk ratio based on message sizes, reserving headroom for @@ -453,34 +548,52 @@ export default function compactionSafeguardExtension(api: ExtensionAPI): void { // incorporates context from pruned messages instead of losing it entirely. const effectivePreviousSummary = droppedSummary ?? preparation.previousSummary; - const historySummary = await summarizeInStages({ - messages: messagesToSummarize, - model, - apiKey, - signal, - reserveTokens, - maxChunkTokens, - contextWindow: contextWindowTokens, - customInstructions: structuredInstructions, - previousSummary: effectivePreviousSummary, - }); - - let summary = historySummary; - if (preparation.isSplitTurn && turnPrefixMessages.length > 0) { - const prefixSummary = await summarizeInStages({ - messages: turnPrefixMessages, + let summary = ""; + let currentInstructions = structuredInstructions; + const totalAttempts = qualityGuardEnabled ? qualityGuardMaxRetries + 1 : 1; + for (let attempt = 0; attempt < totalAttempts; attempt += 1) { + const historySummary = await summarizeInStages({ + messages: messagesToSummarize, model, apiKey, signal, reserveTokens, maxChunkTokens, contextWindow: contextWindowTokens, - customInstructions: `${TURN_PREFIX_INSTRUCTIONS}\n\n${structuredInstructions}`, - previousSummary: undefined, + customInstructions: currentInstructions, + previousSummary: effectivePreviousSummary, }); - summary = `${historySummary}\n\n---\n\n**Turn Context (split turn):**\n\n${prefixSummary}`; + + summary = historySummary; + if (preparation.isSplitTurn && turnPrefixMessages.length > 0) { + const prefixSummary = await summarizeInStages({ + messages: turnPrefixMessages, + model, + apiKey, + signal, + reserveTokens, + maxChunkTokens, + contextWindow: contextWindowTokens, + customInstructions: `${TURN_PREFIX_INSTRUCTIONS}\n\n${currentInstructions}`, + previousSummary: undefined, + }); + summary = `${historySummary}\n\n---\n\n**Turn Context (split turn):**\n\n${prefixSummary}`; + } + summary += preservedTurnsSection; + if (!qualityGuardEnabled) { + break; + } + const quality = auditSummaryQuality({ + summary, + identifiers, + latestAsk: latestUserAsk, + }); + if (quality.ok || attempt >= totalAttempts - 1) { + break; + } + const reasons = quality.reasons.join(", "); + currentInstructions = `${structuredInstructions}\n\nPrevious summary failed quality checks (${reasons}). Fix all issues and include every required section with exact identifiers preserved.`; } - summary += preservedTurnsSection; summary += toolFailureSection; summary += fileOpsSummary; @@ -516,7 +629,10 @@ export const __testing = { splitPreservedRecentTurns, formatPreservedTurnsSection, buildCompactionStructureInstructions, + extractOpaqueIdentifiers, + auditSummaryQuality, resolveRecentTurnsPreserve, + resolveQualityGuardMaxRetries, computeAdaptiveChunkRatio, isOversizedForSummary, BASE_CHUNK_RATIO,