fix(agents): stabilize overflow compaction retries and session context accounting (openclaw#14102) thanks @vpesh

Verified:
- CI checks for commit 86a7ecb45e
- Rebase conflict resolution for compatibility with latest main

Co-authored-by: vpesh <9496634+vpesh@users.noreply.github.com>
This commit is contained in:
Vladimir Peshekhonov
2026-02-13 00:53:13 +01:00
committed by GitHub
parent da55d70fb0
commit 957b883082
13 changed files with 148 additions and 21 deletions

View File

@@ -6,6 +6,7 @@ describe("isCompactionFailureError", () => {
'Context overflow: Summarization failed: 400 {"message":"prompt is too long"}',
"auto-compaction failed due to context overflow",
"Compaction failed: prompt is too long",
"Summarization failed: context window exceeded for this request",
];
for (const sample of samples) {
expect(isCompactionFailureError(sample)).toBe(true);

View File

@@ -30,6 +30,8 @@ describe("isLikelyContextOverflowError", () => {
"too many requests",
"429 Too Many Requests",
"exceeded your current quota",
"This request would exceed your account's rate limit",
"429 Too Many Requests: request exceeds rate limit",
];
for (const sample of samples) {
expect(isLikelyContextOverflowError(sample)).toBe(false);

View File

@@ -38,7 +38,9 @@ export function isContextOverflowError(errorMessage?: string): boolean {
const CONTEXT_WINDOW_TOO_SMALL_RE = /context window.*(too small|minimum is)/i;
const CONTEXT_OVERFLOW_HINT_RE =
/context.*overflow|context window.*(too (?:large|long)|exceed|over|limit|max(?:imum)?|requested|sent|tokens)|(?:prompt|request|input).*(too (?:large|long)|exceed|over|limit|max(?:imum)?)/i;
/context.*overflow|context window.*(too (?:large|long)|exceed|over|limit|max(?:imum)?|requested|sent|tokens)|prompt.*(too (?:large|long)|exceed|over|limit|max(?:imum)?)|(?:request|input).*(?:context|window|length|token).*(too (?:large|long)|exceed|over|limit|max(?:imum)?)/i;
const RATE_LIMIT_HINT_RE =
/rate limit|too many requests|requests per (?:minute|hour|day)|quota|throttl|429\b/i;
export function isLikelyContextOverflowError(errorMessage?: string): boolean {
if (!errorMessage) {
@@ -56,6 +58,9 @@ export function isLikelyContextOverflowError(errorMessage?: string): boolean {
if (isContextOverflowError(errorMessage)) {
return true;
}
if (RATE_LIMIT_HINT_RE.test(errorMessage)) {
return false;
}
return CONTEXT_OVERFLOW_HINT_RE.test(errorMessage);
}
@@ -72,9 +77,13 @@ export function isCompactionFailureError(errorMessage?: string): boolean {
if (!hasCompactionTerm) {
return false;
}
// For compaction failures, also accept "context overflow" without colon
// since the error message itself describes a compaction/summarization failure
return isContextOverflowError(errorMessage) || lower.includes("context overflow");
// Treat any likely overflow shape as a compaction failure when compaction terms are present.
// Providers often vary wording (e.g. "context window exceeded") across APIs.
if (isLikelyContextOverflowError(errorMessage)) {
return true;
}
// Keep explicit fallback for bare "context overflow" strings.
return lower.includes("context overflow");
}
const ERROR_PAYLOAD_PREFIX_RE =

View File

@@ -87,7 +87,21 @@ vi.mock("../failover-error.js", () => ({
}));
vi.mock("../usage.js", () => ({
normalizeUsage: vi.fn(() => undefined),
normalizeUsage: vi.fn((usage?: unknown) =>
usage && typeof usage === "object" ? usage : undefined,
),
derivePromptTokens: vi.fn(
(usage?: { input?: number; cacheRead?: number; cacheWrite?: number }) => {
if (!usage) {
return undefined;
}
const input = usage.input ?? 0;
const cacheRead = usage.cacheRead ?? 0;
const cacheWrite = usage.cacheWrite ?? 0;
const sum = input + cacheRead + cacheWrite;
return sum > 0 ? sum : undefined;
},
),
hasNonzeroUsage: vi.fn(() => false),
}));
@@ -143,6 +157,18 @@ vi.mock("../pi-embedded-helpers.js", async () => {
const lower = msg.toLowerCase();
return lower.includes("request_too_large") || lower.includes("request size exceeds");
},
isLikelyContextOverflowError: (msg?: string) => {
if (!msg) {
return false;
}
const lower = msg.toLowerCase();
return (
lower.includes("request_too_large") ||
lower.includes("request size exceeds") ||
lower.includes("context window exceeded") ||
lower.includes("prompt too large")
);
},
isFailoverAssistantError: vi.fn(() => false),
isFailoverErrorMessage: vi.fn(() => false),
isAuthAssistantError: vi.fn(() => false),
@@ -249,6 +275,31 @@ describe("overflow compaction in run loop", () => {
expect(result.meta.error).toBeUndefined();
});
it("retries after successful compaction on likely-overflow promptError variants", async () => {
const overflowHintError = new Error("Context window exceeded: requested 12000 tokens");
mockedRunEmbeddedAttempt
.mockResolvedValueOnce(makeAttemptResult({ promptError: overflowHintError }))
.mockResolvedValueOnce(makeAttemptResult({ promptError: null }));
mockedCompactDirect.mockResolvedValueOnce({
ok: true,
compacted: true,
result: {
summary: "Compacted session",
firstKeptEntryId: "entry-6",
tokensBefore: 140000,
},
});
const result = await runEmbeddedPiAgent(baseParams);
expect(mockedCompactDirect).toHaveBeenCalledTimes(1);
expect(mockedRunEmbeddedAttempt).toHaveBeenCalledTimes(2);
expect(log.warn).toHaveBeenCalledWith(expect.stringContaining("source=promptError"));
expect(result.meta.error).toBeUndefined();
});
it("returns error if compaction fails", async () => {
const overflowError = new Error("request_too_large: Request size exceeds model context window");
@@ -433,4 +484,31 @@ describe("overflow compaction in run loop", () => {
expect(mockedCompactDirect).not.toHaveBeenCalled();
expect(log.warn).not.toHaveBeenCalledWith(expect.stringContaining("source=assistantError"));
});
it("sets promptTokens from the latest model call usage, not accumulated attempt usage", async () => {
mockedRunEmbeddedAttempt.mockResolvedValue(
makeAttemptResult({
attemptUsage: {
input: 4_000,
cacheRead: 120_000,
cacheWrite: 0,
total: 124_000,
},
lastAssistant: {
stopReason: "end_turn",
usage: {
input: 900,
cacheRead: 1_100,
cacheWrite: 0,
total: 2_000,
},
} as EmbeddedRunAttemptResult["lastAssistant"],
}),
);
const result = await runEmbeddedPiAgent(baseParams);
expect(result.meta.agentMeta?.usage?.input).toBe(4_000);
expect(result.meta.agentMeta?.promptTokens).toBe(2_000);
});
});

View File

@@ -34,7 +34,7 @@ import {
isAuthAssistantError,
isBillingAssistantError,
isCompactionFailureError,
isContextOverflowError,
isLikelyContextOverflowError,
isFailoverAssistantError,
isFailoverErrorMessage,
parseImageSizeError,
@@ -44,7 +44,7 @@ import {
pickFallbackThinkingLevel,
type FailoverReason,
} from "../pi-embedded-helpers.js";
import { normalizeUsage, type UsageLike } from "../usage.js";
import { derivePromptTokens, normalizeUsage, type UsageLike } from "../usage.js";
import { redactRunIdentifier, resolveRunWorkspaceDir } from "../workspace-run.js";
import { compactEmbeddedPiSessionDirect } from "./compact.js";
import { resolveGlobalLane, resolveSessionLane } from "./lanes.js";
@@ -408,6 +408,7 @@ export async function runEmbeddedPiAgent(
let overflowCompactionAttempts = 0;
let toolResultTruncationAttempted = false;
const usageAccumulator = createUsageAccumulator();
let lastRunPromptUsage: ReturnType<typeof normalizeUsage> | undefined;
let autoCompactionCount = 0;
try {
while (true) {
@@ -475,10 +476,12 @@ export async function runEmbeddedPiAgent(
});
const { aborted, promptError, timedOut, sessionIdUsed, lastAssistant } = attempt;
mergeUsageIntoAccumulator(
usageAccumulator,
attempt.attemptUsage ?? normalizeUsage(lastAssistant?.usage as UsageLike),
);
const lastAssistantUsage = normalizeUsage(lastAssistant?.usage as UsageLike);
const attemptUsage = attempt.attemptUsage ?? lastAssistantUsage;
mergeUsageIntoAccumulator(usageAccumulator, attemptUsage);
// Keep prompt size from the latest model call so session totalTokens
// reflects current context usage, not accumulated tool-loop usage.
lastRunPromptUsage = lastAssistantUsage ?? attemptUsage;
autoCompactionCount += Math.max(0, attempt.compactionCount ?? 0);
const formattedAssistantErrorText = lastAssistant
? formatAssistantErrorText(lastAssistant, {
@@ -496,14 +499,14 @@ export async function runEmbeddedPiAgent(
? (() => {
if (promptError) {
const errorText = describeUnknownError(promptError);
if (isContextOverflowError(errorText)) {
if (isLikelyContextOverflowError(errorText)) {
return { text: errorText, source: "promptError" as const };
}
// Prompt submission failed with a non-overflow error. Do not
// inspect prior assistant errors from history for this attempt.
return null;
}
if (assistantErrorText && isContextOverflowError(assistantErrorText)) {
if (assistantErrorText && isLikelyContextOverflowError(assistantErrorText)) {
return { text: assistantErrorText, source: "assistantError" as const };
}
return null;
@@ -826,12 +829,14 @@ export async function runEmbeddedPiAgent(
// overstates the actual context size. `lastCallUsage` reflects only
// the final call, giving an accurate snapshot of current context.
const lastCallUsage = normalizeUsage(lastAssistant?.usage as UsageLike);
const promptTokens = derivePromptTokens(lastRunPromptUsage);
const agentMeta: EmbeddedPiAgentMeta = {
sessionId: sessionIdUsed,
provider: lastAssistant?.provider ?? provider,
model: lastAssistant?.model ?? model.id,
usage,
lastCallUsage: lastCallUsage ?? undefined,
promptTokens,
compactionCount: autoCompactionCount > 0 ? autoCompactionCount : undefined,
};

View File

@@ -6,6 +6,7 @@ export type EmbeddedPiAgentMeta = {
provider: string;
model: string;
compactionCount?: number;
promptTokens?: number;
usage?: {
input?: number;
output?: number;

View File

@@ -74,4 +74,19 @@ describe("normalizeUsage", () => {
}),
).toBe(1_550);
});
it("prefers explicit prompt token overrides", () => {
expect(
deriveSessionTotalTokens({
usage: {
input: 1_200,
cacheRead: 300,
cacheWrite: 50,
total: 9_999,
},
promptTokens: 65_000,
contextTokens: 200_000,
}),
).toBe(65_000);
});
});

View File

@@ -112,18 +112,24 @@ export function deriveSessionTotalTokens(params: {
cacheWrite?: number;
};
contextTokens?: number;
promptTokens?: number;
}): number | undefined {
const promptOverride = params.promptTokens;
const hasPromptOverride =
typeof promptOverride === "number" && Number.isFinite(promptOverride) && promptOverride > 0;
const usage = params.usage;
if (!usage) {
if (!usage && !hasPromptOverride) {
return undefined;
}
const input = usage.input ?? 0;
const promptTokens = derivePromptTokens({
input: usage.input,
cacheRead: usage.cacheRead,
cacheWrite: usage.cacheWrite,
});
let total = promptTokens ?? usage.total ?? input;
const input = usage?.input ?? 0;
const promptTokens = hasPromptOverride
? promptOverride
: derivePromptTokens({
input: usage?.input,
cacheRead: usage?.cacheRead,
cacheWrite: usage?.cacheWrite,
});
let total = promptTokens ?? usage?.total ?? input;
if (!(total > 0)) {
return undefined;
}

View File

@@ -371,6 +371,7 @@ export async function runReplyAgent(params: {
}
const usage = runResult.meta.agentMeta?.usage;
const promptTokens = runResult.meta.agentMeta?.promptTokens;
const modelUsed = runResult.meta.agentMeta?.model ?? fallbackModel ?? defaultModel;
const providerUsed =
runResult.meta.agentMeta?.provider ?? fallbackProvider ?? followupRun.run.provider;
@@ -388,6 +389,7 @@ export async function runReplyAgent(params: {
sessionKey,
usage,
lastCallUsage: runResult.meta.agentMeta?.lastCallUsage,
promptTokens,
modelUsed,
providerUsed,
contextTokensUsed,

View File

@@ -194,6 +194,7 @@ export function createFollowupRunner(params: {
}
const usage = runResult.meta.agentMeta?.usage;
const promptTokens = runResult.meta.agentMeta?.promptTokens;
const modelUsed = runResult.meta.agentMeta?.model ?? fallbackModel ?? defaultModel;
const contextTokensUsed =
agentCfgContextTokens ??
@@ -207,6 +208,7 @@ export function createFollowupRunner(params: {
sessionKey,
usage,
lastCallUsage: runResult.meta.agentMeta?.lastCallUsage,
promptTokens,
modelUsed,
providerUsed: fallbackProvider,
contextTokensUsed,

View File

@@ -25,6 +25,7 @@ export async function persistSessionUsageUpdate(params: {
modelUsed?: string;
providerUsed?: string;
contextTokensUsed?: number;
promptTokens?: number;
systemPromptReport?: SessionSystemPromptReport;
cliSessionId?: string;
logLabel?: string;
@@ -56,6 +57,7 @@ export async function persistSessionUsageUpdate(params: {
deriveSessionTotalTokens({
usage: usageForContext,
contextTokens: resolvedContextTokens,
promptTokens: params.promptTokens,
}) ?? input,
modelProvider: params.providerUsed ?? entry.modelProvider,
model: params.modelUsed ?? entry.model,

View File

@@ -37,6 +37,7 @@ export async function updateSessionStoreAfterAgentRun(params: {
} = params;
const usage = result.meta.agentMeta?.usage;
const promptTokens = result.meta.agentMeta?.promptTokens;
const compactionsThisRun = Math.max(0, result.meta.agentMeta?.compactionCount ?? 0);
const modelUsed = result.meta.agentMeta?.model ?? fallbackModel ?? defaultModel;
const providerUsed = result.meta.agentMeta?.provider ?? fallbackProvider ?? defaultProvider;
@@ -71,6 +72,7 @@ export async function updateSessionStoreAfterAgentRun(params: {
deriveSessionTotalTokens({
usage,
contextTokens,
promptTokens,
}) ?? input;
}
if (compactionsThisRun > 0) {

View File

@@ -456,6 +456,7 @@ export async function runCronIsolatedAgentTurn(params: {
// Update token+model fields in the session store.
{
const usage = runResult.meta.agentMeta?.usage;
const promptTokens = runResult.meta.agentMeta?.promptTokens;
const modelUsed = runResult.meta.agentMeta?.model ?? fallbackModel ?? model;
const providerUsed = runResult.meta.agentMeta?.provider ?? fallbackProvider ?? provider;
const contextTokens =
@@ -479,6 +480,7 @@ export async function runCronIsolatedAgentTurn(params: {
deriveSessionTotalTokens({
usage,
contextTokens,
promptTokens,
}) ?? input;
}
await persistSessionEntry();