fix: carry observed overflow token counts into compaction

- parse provider-reported overflow token counts from prompt errors
- pass observed live counts into context-engine compaction
- forward live counts through the legacy context-engine bridge
- classify live-over-target failures distinctly in compaction diagnostics
- cover the new parser and overflow plumbing with focused tests
This commit is contained in:
rabsef-bicyrm
2026-03-08 16:36:12 -07:00
committed by Josh Lehman
parent f2e28fc30f
commit 26ff413914
8 changed files with 100 additions and 1 deletions

View File

@@ -2,6 +2,7 @@ import { describe, expect, it } from "vitest";
import {
classifyFailoverReason,
classifyFailoverReasonFromHttpStatus,
extractObservedOverflowTokenCount,
isAuthErrorMessage,
isAuthPermanentErrorMessage,
isBillingErrorMessage,
@@ -461,6 +462,29 @@ describe("isLikelyContextOverflowError", () => {
});
});
describe("extractObservedOverflowTokenCount", () => {
it("extracts provider-reported prompt token counts", () => {
expect(
extractObservedOverflowTokenCount(
'400 {"type":"error","error":{"message":"prompt is too long: 277403 tokens > 200000 maximum"}}',
),
).toBe(277403);
expect(extractObservedOverflowTokenCount("Context window exceeded: requested 12000 tokens")).toBe(
12000,
);
expect(
extractObservedOverflowTokenCount(
"This model's maximum context length is 128000 tokens. However, your messages resulted in 145000 tokens.",
),
).toBe(145000);
});
it("returns undefined when overflow counts are not present", () => {
expect(extractObservedOverflowTokenCount("Prompt too large for this model")).toBeUndefined();
expect(extractObservedOverflowTokenCount("rate limit exceeded")).toBeUndefined();
});
});
describe("isTransientHttpError", () => {
it("returns true for retryable 5xx status codes", () => {
expect(isTransientHttpError("499 Client Closed Request")).toBe(true);

View File

@@ -22,6 +22,7 @@ export {
isAuthPermanentErrorMessage,
isModelNotFoundErrorMessage,
isBillingAssistantError,
extractObservedOverflowTokenCount,
parseApiErrorInfo,
sanitizeUserFacingText,
isBillingErrorMessage,

View File

@@ -185,6 +185,32 @@ export function isCompactionFailureError(errorMessage?: string): boolean {
return lower.includes("context overflow");
}
const OBSERVED_OVERFLOW_TOKEN_PATTERNS = [
/prompt is too long:\s*([\d,]+)\s+tokens\s*>\s*[\d,]+\s+maximum/i,
/requested\s+([\d,]+)\s+tokens/i,
/resulted in\s+([\d,]+)\s+tokens/i,
];
export function extractObservedOverflowTokenCount(errorMessage?: string): number | undefined {
if (!errorMessage) {
return undefined;
}
for (const pattern of OBSERVED_OVERFLOW_TOKEN_PATTERNS) {
const match = errorMessage.match(pattern);
const rawCount = match?.[1]?.replaceAll(",", "");
if (!rawCount) {
continue;
}
const parsed = Number(rawCount);
if (Number.isFinite(parsed) && parsed > 0) {
return Math.floor(parsed);
}
}
return undefined;
}
const ERROR_PAYLOAD_PREFIX_RE =
/^(?:error|api\s*error|apierror|openai\s*error|anthropic\s*error|gateway\s*error)[:\s-]+/i;
const FINAL_TAG_RE = /<\s*\/?\s*final\s*>/gi;

View File

@@ -114,6 +114,7 @@ export type CompactEmbeddedPiSessionParams = {
/** Whether the sender is an owner (required for owner-only tools). */
senderIsOwner?: boolean;
sessionFile: string;
currentTokenCount?: number;
workspaceDir: string;
agentDir?: string;
config?: OpenClawConfig;
@@ -228,6 +229,9 @@ function classifyCompactionReason(reason?: string): string {
if (text.includes("already compacted")) {
return "already_compacted_recently";
}
if (text.includes("still exceeds target")) {
return "live_context_still_exceeds_target";
}
if (text.includes("guard")) {
return "guard_blocked";
}

View File

@@ -109,13 +109,21 @@ vi.mock("../workspace-run.js", () => ({
vi.mock("../pi-embedded-helpers.js", () => ({
formatBillingErrorMessage: vi.fn(() => ""),
classifyFailoverReason: vi.fn(() => null),
extractObservedOverflowTokenCount: vi.fn((msg?: string) => {
const match = msg?.match(/prompt is too long:\s*(\d+)\s+tokens/i);
return match?.[1] ? Number(match[1]) : undefined;
}),
formatAssistantErrorText: vi.fn(() => ""),
isAuthAssistantError: vi.fn(() => false),
isBillingAssistantError: vi.fn(() => false),
isCompactionFailureError: vi.fn(() => false),
isLikelyContextOverflowError: vi.fn((msg?: string) => {
const lower = (msg ?? "").toLowerCase();
return lower.includes("request_too_large") || lower.includes("context window exceeded");
return (
lower.includes("request_too_large") ||
lower.includes("context window exceeded") ||
lower.includes("prompt is too long")
);
}),
isFailoverAssistantError: vi.fn(() => false),
isFailoverErrorMessage: vi.fn(() => false),

View File

@@ -111,6 +111,32 @@ describe("runEmbeddedPiAgent overflow compaction trigger routing", () => {
);
});
it("passes observed overflow token counts into compaction when providers report them", async () => {
const overflowError = new Error(
'400 {"type":"error","error":{"type":"invalid_request_error","message":"prompt is too long: 277403 tokens > 200000 maximum"}}',
);
mockedRunEmbeddedAttempt
.mockResolvedValueOnce(makeAttemptResult({ promptError: overflowError }))
.mockResolvedValueOnce(makeAttemptResult({ promptError: null }));
mockedCompactDirect.mockResolvedValueOnce(
makeCompactionSuccess({
summary: "Compacted session",
firstKeptEntryId: "entry-8",
tokensBefore: 277403,
}),
);
const result = await runEmbeddedPiAgent(overflowBaseRunParams);
expect(mockedCompactDirect).toHaveBeenCalledWith(
expect.objectContaining({
currentTokenCount: 277403,
}),
);
expect(result.meta.error).toBeUndefined();
});
it("does not reset compaction attempt budget after successful tool-result truncation", async () => {
const overflowError = queueOverflowAttemptWithOversizedToolOutput(
mockedRunEmbeddedAttempt,

View File

@@ -40,6 +40,7 @@ import { ensureOpenClawModelsJson } from "../models-config.js";
import {
formatBillingErrorMessage,
classifyFailoverReason,
extractObservedOverflowTokenCount,
formatAssistantErrorText,
isAuthAssistantError,
isBillingAssistantError,
@@ -988,11 +989,13 @@ export async function runEmbeddedPiAgent(
const overflowDiagId = createCompactionDiagId();
const errorText = contextOverflowError.text;
const msgCount = attempt.messagesSnapshot?.length ?? 0;
const observedOverflowTokens = extractObservedOverflowTokenCount(errorText);
log.warn(
`[context-overflow-diag] sessionKey=${params.sessionKey ?? params.sessionId} ` +
`provider=${provider}/${modelId} source=${contextOverflowError.source} ` +
`messages=${msgCount} sessionFile=${params.sessionFile} ` +
`diagId=${overflowDiagId} compactionAttempts=${overflowCompactionAttempts} ` +
`observedTokens=${observedOverflowTokens ?? "unknown"} ` +
`error=${errorText.slice(0, 200)}`,
);
const isCompactionFailure = isCompactionFailureError(errorText);
@@ -1052,6 +1055,9 @@ export async function runEmbeddedPiAgent(
sessionId: params.sessionId,
sessionFile: params.sessionFile,
tokenBudget: ctxInfo.tokens,
...(observedOverflowTokens !== undefined
? { currentTokenCount: observedOverflowTokens }
: {}),
force: true,
compactionTarget: "budget",
runtimeContext: {
@@ -1074,6 +1080,9 @@ export async function runEmbeddedPiAgent(
extraSystemPrompt: params.extraSystemPrompt,
ownerNumbers: params.ownerNumbers,
trigger: "overflow",
...(observedOverflowTokens !== undefined
? { currentTokenCount: observedOverflowTokens }
: {}),
diagId: overflowDiagId,
attempt: overflowCompactionAttempts,
maxAttempts: MAX_OVERFLOW_COMPACTION_ATTEMPTS,

View File

@@ -85,6 +85,7 @@ export class LegacyContextEngine implements ContextEngine {
sessionId: params.sessionId,
sessionFile: params.sessionFile,
tokenBudget: params.tokenBudget,
currentTokenCount: params.currentTokenCount,
force: params.force,
customInstructions: params.customInstructions,
workspaceDir: (runtimeContext.workspaceDir as string) ?? process.cwd(),