mirror of
https://github.com/openclaw/openclaw.git
synced 2026-05-21 02:54:59 +00:00
Compaction/Safeguard: add summary quality audit retries
This commit is contained in:
@@ -11,6 +11,8 @@ export type CompactionSafeguardRuntimeValue = {
|
||||
*/
|
||||
model?: Model<Api>;
|
||||
recentTurnsPreserve?: number;
|
||||
qualityGuardEnabled?: boolean;
|
||||
qualityGuardMaxRetries?: number;
|
||||
};
|
||||
|
||||
const registry = createSessionManagerRuntimeRegistry<CompactionSafeguardRuntimeValue>();
|
||||
|
||||
@@ -14,7 +14,10 @@ const {
|
||||
splitPreservedRecentTurns,
|
||||
formatPreservedTurnsSection,
|
||||
buildCompactionStructureInstructions,
|
||||
extractOpaqueIdentifiers,
|
||||
auditSummaryQuality,
|
||||
resolveRecentTurnsPreserve,
|
||||
resolveQualityGuardMaxRetries,
|
||||
computeAdaptiveChunkRatio,
|
||||
isOversizedForSummary,
|
||||
BASE_CHUNK_RATIO,
|
||||
@@ -408,6 +411,77 @@ describe("compaction-safeguard recent-turn preservation", () => {
|
||||
expect(instructions).toContain("## Exact identifiers");
|
||||
expect(instructions).toContain("Keep security caveats.");
|
||||
});
|
||||
|
||||
it("extracts opaque identifiers and audits summary quality", () => {
|
||||
const identifiers = extractOpaqueIdentifiers(
|
||||
"Track id a1b2c3d4e5f6 plus A1B2C3D4E5F6 and URL https://example.com/a and /tmp/x.log plus port host.local:18789",
|
||||
);
|
||||
expect(identifiers.length).toBeGreaterThan(0);
|
||||
expect(identifiers).toContain("A1B2C3D4E5F6");
|
||||
|
||||
const summary = [
|
||||
"## Decisions",
|
||||
"Keep current flow.",
|
||||
"## Open TODOs",
|
||||
"None.",
|
||||
"## Constraints/Rules",
|
||||
"Preserve identifiers.",
|
||||
"## Pending user asks",
|
||||
"Explain post-compaction behavior.",
|
||||
"## Exact identifiers",
|
||||
identifiers.join(", "),
|
||||
].join("\n");
|
||||
|
||||
const quality = auditSummaryQuality({
|
||||
summary,
|
||||
identifiers,
|
||||
latestAsk: "Explain post-compaction behavior for memory indexing",
|
||||
});
|
||||
expect(quality.ok).toBe(true);
|
||||
});
|
||||
|
||||
it("dedupes identifiers before applying the result cap", () => {
|
||||
const noisyPrefix = Array.from({ length: 10 }, () => "a0b0c0d0").join(" ");
|
||||
const uniqueTail = Array.from(
|
||||
{ length: 12 },
|
||||
(_, idx) => `b${idx.toString(16).padStart(7, "0")}`,
|
||||
);
|
||||
const identifiers = extractOpaqueIdentifiers(`${noisyPrefix} ${uniqueTail.join(" ")}`);
|
||||
|
||||
expect(identifiers).toHaveLength(12);
|
||||
expect(new Set(identifiers).size).toBe(12);
|
||||
expect(identifiers).toContain("a0b0c0d0");
|
||||
expect(identifiers).toContain(uniqueTail[10]);
|
||||
});
|
||||
|
||||
it("filters ordinary short numbers and trims wrapped punctuation", () => {
|
||||
const identifiers = extractOpaqueIdentifiers(
|
||||
"Year 2026 count 42 port 18789 ticket 123456 URL https://example.com/a, path /tmp/x.log.",
|
||||
);
|
||||
|
||||
expect(identifiers).not.toContain("2026");
|
||||
expect(identifiers).not.toContain("42");
|
||||
expect(identifiers).not.toContain("18789");
|
||||
expect(identifiers).toContain("123456");
|
||||
expect(identifiers).toContain("https://example.com/a");
|
||||
expect(identifiers).toContain("/tmp/x.log");
|
||||
});
|
||||
|
||||
it("fails quality audit when required sections are missing", () => {
|
||||
const quality = auditSummaryQuality({
|
||||
summary: "Short summary without structure",
|
||||
identifiers: ["abc12345"],
|
||||
latestAsk: "Need a status update",
|
||||
});
|
||||
expect(quality.ok).toBe(false);
|
||||
expect(quality.reasons.length).toBeGreaterThan(0);
|
||||
});
|
||||
|
||||
it("clamps quality-guard retries into a safe range", () => {
|
||||
expect(resolveQualityGuardMaxRetries(undefined)).toBe(1);
|
||||
expect(resolveQualityGuardMaxRetries(-1)).toBe(0);
|
||||
expect(resolveQualityGuardMaxRetries(99)).toBe(3);
|
||||
});
|
||||
});
|
||||
|
||||
describe("compaction-safeguard extension model fallback", () => {
|
||||
|
||||
@@ -29,8 +29,11 @@ const TURN_PREFIX_INSTRUCTIONS =
|
||||
const MAX_TOOL_FAILURES = 8;
|
||||
const MAX_TOOL_FAILURE_CHARS = 240;
|
||||
const DEFAULT_RECENT_TURNS_PRESERVE = 3;
|
||||
const DEFAULT_QUALITY_GUARD_MAX_RETRIES = 1;
|
||||
const MAX_RECENT_TURNS_PRESERVE = 12;
|
||||
const MAX_QUALITY_GUARD_MAX_RETRIES = 3;
|
||||
const MAX_RECENT_TURN_TEXT_CHARS = 600;
|
||||
const MAX_EXTRACTED_IDENTIFIERS = 12;
|
||||
const REQUIRED_SUMMARY_SECTIONS = [
|
||||
"## Decisions",
|
||||
"## Open TODOs",
|
||||
@@ -58,6 +61,13 @@ function resolveRecentTurnsPreserve(value: unknown): number {
|
||||
);
|
||||
}
|
||||
|
||||
function resolveQualityGuardMaxRetries(value: unknown): number {
|
||||
return Math.min(
|
||||
MAX_QUALITY_GUARD_MAX_RETRIES,
|
||||
clampNonNegativeInt(value, DEFAULT_QUALITY_GUARD_MAX_RETRIES),
|
||||
);
|
||||
}
|
||||
|
||||
function normalizeFailureText(text: string): string {
|
||||
return text.replace(/\s+/g, " ").trim();
|
||||
}
|
||||
@@ -275,6 +285,78 @@ function buildCompactionStructureInstructions(customInstructions?: string): stri
|
||||
return `${sectionsTemplate}\n\nAdditional focus:\n${custom}`;
|
||||
}
|
||||
|
||||
function sanitizeExtractedIdentifier(value: string): string {
|
||||
return value
|
||||
.trim()
|
||||
.replace(/^[("'`[{<]+/, "")
|
||||
.replace(/[)\]"'`,;:.!?<>]+$/, "");
|
||||
}
|
||||
|
||||
function extractOpaqueIdentifiers(text: string): string[] {
|
||||
const matches =
|
||||
text.match(
|
||||
/([A-Fa-f0-9]{8,}|https?:\/\/\S+|\/[\w./-]+|[A-Za-z]:\\[\w\\.-]+|[A-Za-z0-9._-]+\.[A-Za-z0-9._/-]+:\d{1,5}|\b\d{6,}\b)/g,
|
||||
) ?? [];
|
||||
return Array.from(
|
||||
new Set(
|
||||
matches
|
||||
.map((value) => sanitizeExtractedIdentifier(value))
|
||||
.filter((value) => value.length >= 4),
|
||||
),
|
||||
).slice(0, MAX_EXTRACTED_IDENTIFIERS);
|
||||
}
|
||||
|
||||
function extractLatestUserAsk(messages: AgentMessage[]): string | null {
|
||||
for (let i = messages.length - 1; i >= 0; i -= 1) {
|
||||
const msg = messages[i];
|
||||
if (msg.role !== "user") {
|
||||
continue;
|
||||
}
|
||||
const text = extractMessageText(msg);
|
||||
if (text) {
|
||||
return text;
|
||||
}
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
function hasAskOverlap(summary: string, latestAsk: string | null): boolean {
|
||||
if (!latestAsk) {
|
||||
return true;
|
||||
}
|
||||
const normalizedSummary = summary.toLowerCase();
|
||||
const tokens = latestAsk
|
||||
.toLowerCase()
|
||||
.split(/[^a-z0-9]+/g)
|
||||
.filter((token) => token.length >= 5)
|
||||
.slice(0, 8);
|
||||
if (tokens.length === 0) {
|
||||
return true;
|
||||
}
|
||||
return tokens.some((token) => normalizedSummary.includes(token));
|
||||
}
|
||||
|
||||
function auditSummaryQuality(params: {
|
||||
summary: string;
|
||||
identifiers: string[];
|
||||
latestAsk: string | null;
|
||||
}): { ok: boolean; reasons: string[] } {
|
||||
const reasons: string[] = [];
|
||||
for (const section of REQUIRED_SUMMARY_SECTIONS) {
|
||||
if (!params.summary.includes(section)) {
|
||||
reasons.push(`missing_section:${section}`);
|
||||
}
|
||||
}
|
||||
const missingIdentifiers = params.identifiers.filter((id) => !params.summary.includes(id));
|
||||
if (missingIdentifiers.length > 0) {
|
||||
reasons.push(`missing_identifiers:${missingIdentifiers.slice(0, 3).join(",")}`);
|
||||
}
|
||||
if (!hasAskOverlap(params.summary, params.latestAsk)) {
|
||||
reasons.push("latest_user_ask_not_reflected");
|
||||
}
|
||||
return { ok: reasons.length === 0, reasons };
|
||||
}
|
||||
|
||||
/**
|
||||
* Read and format critical workspace context for compaction summary.
|
||||
* Extracts "Session Startup" and "Red Lines" from AGENTS.md.
|
||||
@@ -358,6 +440,8 @@ export default function compactionSafeguardExtension(api: ExtensionAPI): void {
|
||||
const turnPrefixMessages = preparation.turnPrefixMessages ?? [];
|
||||
let messagesToSummarize = preparation.messagesToSummarize;
|
||||
const recentTurnsPreserve = resolveRecentTurnsPreserve(runtime?.recentTurnsPreserve);
|
||||
const qualityGuardEnabled = runtime?.qualityGuardEnabled ?? true;
|
||||
const qualityGuardMaxRetries = resolveQualityGuardMaxRetries(runtime?.qualityGuardMaxRetries);
|
||||
|
||||
const maxHistoryShare = runtime?.maxHistoryShare ?? 0.5;
|
||||
|
||||
@@ -436,6 +520,17 @@ export default function compactionSafeguardExtension(api: ExtensionAPI): void {
|
||||
});
|
||||
messagesToSummarize = summaryTargetMessages;
|
||||
const preservedTurnsSection = formatPreservedTurnsSection(preservedRecentMessages);
|
||||
const latestUserAsk = extractLatestUserAsk([
|
||||
...messagesToSummarize,
|
||||
...preservedRecentMessages,
|
||||
...turnPrefixMessages,
|
||||
]);
|
||||
const identifierSeedText = [...messagesToSummarize, ...preservedRecentMessages]
|
||||
.slice(-10)
|
||||
.map((message) => extractMessageText(message))
|
||||
.filter(Boolean)
|
||||
.join("\n");
|
||||
const identifiers = extractOpaqueIdentifiers(identifierSeedText);
|
||||
const structuredInstructions = buildCompactionStructureInstructions(customInstructions);
|
||||
|
||||
// Use adaptive chunk ratio based on message sizes, reserving headroom for
|
||||
@@ -453,34 +548,52 @@ export default function compactionSafeguardExtension(api: ExtensionAPI): void {
|
||||
// incorporates context from pruned messages instead of losing it entirely.
|
||||
const effectivePreviousSummary = droppedSummary ?? preparation.previousSummary;
|
||||
|
||||
const historySummary = await summarizeInStages({
|
||||
messages: messagesToSummarize,
|
||||
model,
|
||||
apiKey,
|
||||
signal,
|
||||
reserveTokens,
|
||||
maxChunkTokens,
|
||||
contextWindow: contextWindowTokens,
|
||||
customInstructions: structuredInstructions,
|
||||
previousSummary: effectivePreviousSummary,
|
||||
});
|
||||
|
||||
let summary = historySummary;
|
||||
if (preparation.isSplitTurn && turnPrefixMessages.length > 0) {
|
||||
const prefixSummary = await summarizeInStages({
|
||||
messages: turnPrefixMessages,
|
||||
let summary = "";
|
||||
let currentInstructions = structuredInstructions;
|
||||
const totalAttempts = qualityGuardEnabled ? qualityGuardMaxRetries + 1 : 1;
|
||||
for (let attempt = 0; attempt < totalAttempts; attempt += 1) {
|
||||
const historySummary = await summarizeInStages({
|
||||
messages: messagesToSummarize,
|
||||
model,
|
||||
apiKey,
|
||||
signal,
|
||||
reserveTokens,
|
||||
maxChunkTokens,
|
||||
contextWindow: contextWindowTokens,
|
||||
customInstructions: `${TURN_PREFIX_INSTRUCTIONS}\n\n${structuredInstructions}`,
|
||||
previousSummary: undefined,
|
||||
customInstructions: currentInstructions,
|
||||
previousSummary: effectivePreviousSummary,
|
||||
});
|
||||
summary = `${historySummary}\n\n---\n\n**Turn Context (split turn):**\n\n${prefixSummary}`;
|
||||
|
||||
summary = historySummary;
|
||||
if (preparation.isSplitTurn && turnPrefixMessages.length > 0) {
|
||||
const prefixSummary = await summarizeInStages({
|
||||
messages: turnPrefixMessages,
|
||||
model,
|
||||
apiKey,
|
||||
signal,
|
||||
reserveTokens,
|
||||
maxChunkTokens,
|
||||
contextWindow: contextWindowTokens,
|
||||
customInstructions: `${TURN_PREFIX_INSTRUCTIONS}\n\n${currentInstructions}`,
|
||||
previousSummary: undefined,
|
||||
});
|
||||
summary = `${historySummary}\n\n---\n\n**Turn Context (split turn):**\n\n${prefixSummary}`;
|
||||
}
|
||||
summary += preservedTurnsSection;
|
||||
if (!qualityGuardEnabled) {
|
||||
break;
|
||||
}
|
||||
const quality = auditSummaryQuality({
|
||||
summary,
|
||||
identifiers,
|
||||
latestAsk: latestUserAsk,
|
||||
});
|
||||
if (quality.ok || attempt >= totalAttempts - 1) {
|
||||
break;
|
||||
}
|
||||
const reasons = quality.reasons.join(", ");
|
||||
currentInstructions = `${structuredInstructions}\n\nPrevious summary failed quality checks (${reasons}). Fix all issues and include every required section with exact identifiers preserved.`;
|
||||
}
|
||||
summary += preservedTurnsSection;
|
||||
|
||||
summary += toolFailureSection;
|
||||
summary += fileOpsSummary;
|
||||
@@ -516,7 +629,10 @@ export const __testing = {
|
||||
splitPreservedRecentTurns,
|
||||
formatPreservedTurnsSection,
|
||||
buildCompactionStructureInstructions,
|
||||
extractOpaqueIdentifiers,
|
||||
auditSummaryQuality,
|
||||
resolveRecentTurnsPreserve,
|
||||
resolveQualityGuardMaxRetries,
|
||||
computeAdaptiveChunkRatio,
|
||||
isOversizedForSummary,
|
||||
BASE_CHUNK_RATIO,
|
||||
|
||||
Reference in New Issue
Block a user