Compaction/Safeguard: add summary quality audit retries

This commit is contained in:
Rodrigo Uroz
2026-02-24 16:32:34 +00:00
parent 806d38b43a
commit d783ffb44d
3 changed files with 212 additions and 20 deletions

View File

@@ -11,6 +11,8 @@ export type CompactionSafeguardRuntimeValue = {
*/
model?: Model<Api>;
recentTurnsPreserve?: number;
qualityGuardEnabled?: boolean;
qualityGuardMaxRetries?: number;
};
const registry = createSessionManagerRuntimeRegistry<CompactionSafeguardRuntimeValue>();

View File

@@ -14,7 +14,10 @@ const {
splitPreservedRecentTurns,
formatPreservedTurnsSection,
buildCompactionStructureInstructions,
extractOpaqueIdentifiers,
auditSummaryQuality,
resolveRecentTurnsPreserve,
resolveQualityGuardMaxRetries,
computeAdaptiveChunkRatio,
isOversizedForSummary,
BASE_CHUNK_RATIO,
@@ -408,6 +411,77 @@ describe("compaction-safeguard recent-turn preservation", () => {
expect(instructions).toContain("## Exact identifiers");
expect(instructions).toContain("Keep security caveats.");
});
it("extracts opaque identifiers and audits summary quality", () => {
const identifiers = extractOpaqueIdentifiers(
"Track id a1b2c3d4e5f6 plus A1B2C3D4E5F6 and URL https://example.com/a and /tmp/x.log plus port host.local:18789",
);
expect(identifiers.length).toBeGreaterThan(0);
expect(identifiers).toContain("A1B2C3D4E5F6");
const summary = [
"## Decisions",
"Keep current flow.",
"## Open TODOs",
"None.",
"## Constraints/Rules",
"Preserve identifiers.",
"## Pending user asks",
"Explain post-compaction behavior.",
"## Exact identifiers",
identifiers.join(", "),
].join("\n");
const quality = auditSummaryQuality({
summary,
identifiers,
latestAsk: "Explain post-compaction behavior for memory indexing",
});
expect(quality.ok).toBe(true);
});
it("dedupes identifiers before applying the result cap", () => {
const noisyPrefix = Array.from({ length: 10 }, () => "a0b0c0d0").join(" ");
const uniqueTail = Array.from(
{ length: 12 },
(_, idx) => `b${idx.toString(16).padStart(7, "0")}`,
);
const identifiers = extractOpaqueIdentifiers(`${noisyPrefix} ${uniqueTail.join(" ")}`);
expect(identifiers).toHaveLength(12);
expect(new Set(identifiers).size).toBe(12);
expect(identifiers).toContain("a0b0c0d0");
expect(identifiers).toContain(uniqueTail[10]);
});
it("filters ordinary short numbers and trims wrapped punctuation", () => {
const identifiers = extractOpaqueIdentifiers(
"Year 2026 count 42 port 18789 ticket 123456 URL https://example.com/a, path /tmp/x.log.",
);
expect(identifiers).not.toContain("2026");
expect(identifiers).not.toContain("42");
expect(identifiers).not.toContain("18789");
expect(identifiers).toContain("123456");
expect(identifiers).toContain("https://example.com/a");
expect(identifiers).toContain("/tmp/x.log");
});
it("fails quality audit when required sections are missing", () => {
const quality = auditSummaryQuality({
summary: "Short summary without structure",
identifiers: ["abc12345"],
latestAsk: "Need a status update",
});
expect(quality.ok).toBe(false);
expect(quality.reasons.length).toBeGreaterThan(0);
});
it("clamps quality-guard retries into a safe range", () => {
expect(resolveQualityGuardMaxRetries(undefined)).toBe(1);
expect(resolveQualityGuardMaxRetries(-1)).toBe(0);
expect(resolveQualityGuardMaxRetries(99)).toBe(3);
});
});
describe("compaction-safeguard extension model fallback", () => {

View File

@@ -29,8 +29,11 @@ const TURN_PREFIX_INSTRUCTIONS =
const MAX_TOOL_FAILURES = 8;
const MAX_TOOL_FAILURE_CHARS = 240;
const DEFAULT_RECENT_TURNS_PRESERVE = 3;
const DEFAULT_QUALITY_GUARD_MAX_RETRIES = 1;
const MAX_RECENT_TURNS_PRESERVE = 12;
const MAX_QUALITY_GUARD_MAX_RETRIES = 3;
const MAX_RECENT_TURN_TEXT_CHARS = 600;
const MAX_EXTRACTED_IDENTIFIERS = 12;
const REQUIRED_SUMMARY_SECTIONS = [
"## Decisions",
"## Open TODOs",
@@ -58,6 +61,13 @@ function resolveRecentTurnsPreserve(value: unknown): number {
);
}
function resolveQualityGuardMaxRetries(value: unknown): number {
return Math.min(
MAX_QUALITY_GUARD_MAX_RETRIES,
clampNonNegativeInt(value, DEFAULT_QUALITY_GUARD_MAX_RETRIES),
);
}
function normalizeFailureText(text: string): string {
return text.replace(/\s+/g, " ").trim();
}
@@ -275,6 +285,78 @@ function buildCompactionStructureInstructions(customInstructions?: string): stri
return `${sectionsTemplate}\n\nAdditional focus:\n${custom}`;
}
function sanitizeExtractedIdentifier(value: string): string {
return value
.trim()
.replace(/^[("'`[{<]+/, "")
.replace(/[)\]"'`,;:.!?<>]+$/, "");
}
function extractOpaqueIdentifiers(text: string): string[] {
const matches =
text.match(
/([A-Fa-f0-9]{8,}|https?:\/\/\S+|\/[\w./-]+|[A-Za-z]:\\[\w\\.-]+|[A-Za-z0-9._-]+\.[A-Za-z0-9._/-]+:\d{1,5}|\b\d{6,}\b)/g,
) ?? [];
return Array.from(
new Set(
matches
.map((value) => sanitizeExtractedIdentifier(value))
.filter((value) => value.length >= 4),
),
).slice(0, MAX_EXTRACTED_IDENTIFIERS);
}
function extractLatestUserAsk(messages: AgentMessage[]): string | null {
for (let i = messages.length - 1; i >= 0; i -= 1) {
const msg = messages[i];
if (msg.role !== "user") {
continue;
}
const text = extractMessageText(msg);
if (text) {
return text;
}
}
return null;
}
function hasAskOverlap(summary: string, latestAsk: string | null): boolean {
if (!latestAsk) {
return true;
}
const normalizedSummary = summary.toLowerCase();
const tokens = latestAsk
.toLowerCase()
.split(/[^a-z0-9]+/g)
.filter((token) => token.length >= 5)
.slice(0, 8);
if (tokens.length === 0) {
return true;
}
return tokens.some((token) => normalizedSummary.includes(token));
}
function auditSummaryQuality(params: {
summary: string;
identifiers: string[];
latestAsk: string | null;
}): { ok: boolean; reasons: string[] } {
const reasons: string[] = [];
for (const section of REQUIRED_SUMMARY_SECTIONS) {
if (!params.summary.includes(section)) {
reasons.push(`missing_section:${section}`);
}
}
const missingIdentifiers = params.identifiers.filter((id) => !params.summary.includes(id));
if (missingIdentifiers.length > 0) {
reasons.push(`missing_identifiers:${missingIdentifiers.slice(0, 3).join(",")}`);
}
if (!hasAskOverlap(params.summary, params.latestAsk)) {
reasons.push("latest_user_ask_not_reflected");
}
return { ok: reasons.length === 0, reasons };
}
/**
* Read and format critical workspace context for compaction summary.
* Extracts "Session Startup" and "Red Lines" from AGENTS.md.
@@ -358,6 +440,8 @@ export default function compactionSafeguardExtension(api: ExtensionAPI): void {
const turnPrefixMessages = preparation.turnPrefixMessages ?? [];
let messagesToSummarize = preparation.messagesToSummarize;
const recentTurnsPreserve = resolveRecentTurnsPreserve(runtime?.recentTurnsPreserve);
const qualityGuardEnabled = runtime?.qualityGuardEnabled ?? true;
const qualityGuardMaxRetries = resolveQualityGuardMaxRetries(runtime?.qualityGuardMaxRetries);
const maxHistoryShare = runtime?.maxHistoryShare ?? 0.5;
@@ -436,6 +520,17 @@ export default function compactionSafeguardExtension(api: ExtensionAPI): void {
});
messagesToSummarize = summaryTargetMessages;
const preservedTurnsSection = formatPreservedTurnsSection(preservedRecentMessages);
const latestUserAsk = extractLatestUserAsk([
...messagesToSummarize,
...preservedRecentMessages,
...turnPrefixMessages,
]);
const identifierSeedText = [...messagesToSummarize, ...preservedRecentMessages]
.slice(-10)
.map((message) => extractMessageText(message))
.filter(Boolean)
.join("\n");
const identifiers = extractOpaqueIdentifiers(identifierSeedText);
const structuredInstructions = buildCompactionStructureInstructions(customInstructions);
// Use adaptive chunk ratio based on message sizes, reserving headroom for
@@ -453,34 +548,52 @@ export default function compactionSafeguardExtension(api: ExtensionAPI): void {
// incorporates context from pruned messages instead of losing it entirely.
const effectivePreviousSummary = droppedSummary ?? preparation.previousSummary;
const historySummary = await summarizeInStages({
messages: messagesToSummarize,
model,
apiKey,
signal,
reserveTokens,
maxChunkTokens,
contextWindow: contextWindowTokens,
customInstructions: structuredInstructions,
previousSummary: effectivePreviousSummary,
});
let summary = historySummary;
if (preparation.isSplitTurn && turnPrefixMessages.length > 0) {
const prefixSummary = await summarizeInStages({
messages: turnPrefixMessages,
let summary = "";
let currentInstructions = structuredInstructions;
const totalAttempts = qualityGuardEnabled ? qualityGuardMaxRetries + 1 : 1;
for (let attempt = 0; attempt < totalAttempts; attempt += 1) {
const historySummary = await summarizeInStages({
messages: messagesToSummarize,
model,
apiKey,
signal,
reserveTokens,
maxChunkTokens,
contextWindow: contextWindowTokens,
customInstructions: `${TURN_PREFIX_INSTRUCTIONS}\n\n${structuredInstructions}`,
previousSummary: undefined,
customInstructions: currentInstructions,
previousSummary: effectivePreviousSummary,
});
summary = `${historySummary}\n\n---\n\n**Turn Context (split turn):**\n\n${prefixSummary}`;
summary = historySummary;
if (preparation.isSplitTurn && turnPrefixMessages.length > 0) {
const prefixSummary = await summarizeInStages({
messages: turnPrefixMessages,
model,
apiKey,
signal,
reserveTokens,
maxChunkTokens,
contextWindow: contextWindowTokens,
customInstructions: `${TURN_PREFIX_INSTRUCTIONS}\n\n${currentInstructions}`,
previousSummary: undefined,
});
summary = `${historySummary}\n\n---\n\n**Turn Context (split turn):**\n\n${prefixSummary}`;
}
summary += preservedTurnsSection;
if (!qualityGuardEnabled) {
break;
}
const quality = auditSummaryQuality({
summary,
identifiers,
latestAsk: latestUserAsk,
});
if (quality.ok || attempt >= totalAttempts - 1) {
break;
}
const reasons = quality.reasons.join(", ");
currentInstructions = `${structuredInstructions}\n\nPrevious summary failed quality checks (${reasons}). Fix all issues and include every required section with exact identifiers preserved.`;
}
summary += preservedTurnsSection;
summary += toolFailureSection;
summary += fileOpsSummary;
@@ -516,7 +629,10 @@ export const __testing = {
splitPreservedRecentTurns,
formatPreservedTurnsSection,
buildCompactionStructureInstructions,
extractOpaqueIdentifiers,
auditSummaryQuality,
resolveRecentTurnsPreserve,
resolveQualityGuardMaxRetries,
computeAdaptiveChunkRatio,
isOversizedForSummary,
BASE_CHUNK_RATIO,