mirror of
https://github.com/openclaw/openclaw.git
synced 2026-04-18 15:17:27 +00:00
fix(agents): stabilize overflow compaction retries and session context accounting (openclaw#14102) thanks @vpesh
Verified:
- CI checks for commit 86a7ecb45e
- Rebase conflict resolution for compatibility with latest main
Co-authored-by: vpesh <9496634+vpesh@users.noreply.github.com>
This commit is contained in:
committed by
GitHub
parent
da55d70fb0
commit
957b883082
@@ -6,6 +6,7 @@ describe("isCompactionFailureError", () => {
|
||||
'Context overflow: Summarization failed: 400 {"message":"prompt is too long"}',
|
||||
"auto-compaction failed due to context overflow",
|
||||
"Compaction failed: prompt is too long",
|
||||
"Summarization failed: context window exceeded for this request",
|
||||
];
|
||||
for (const sample of samples) {
|
||||
expect(isCompactionFailureError(sample)).toBe(true);
|
||||
|
||||
@@ -30,6 +30,8 @@ describe("isLikelyContextOverflowError", () => {
|
||||
"too many requests",
|
||||
"429 Too Many Requests",
|
||||
"exceeded your current quota",
|
||||
"This request would exceed your account's rate limit",
|
||||
"429 Too Many Requests: request exceeds rate limit",
|
||||
];
|
||||
for (const sample of samples) {
|
||||
expect(isLikelyContextOverflowError(sample)).toBe(false);
|
||||
|
||||
@@ -38,7 +38,9 @@ export function isContextOverflowError(errorMessage?: string): boolean {
|
||||
|
||||
const CONTEXT_WINDOW_TOO_SMALL_RE = /context window.*(too small|minimum is)/i;
|
||||
const CONTEXT_OVERFLOW_HINT_RE =
|
||||
/context.*overflow|context window.*(too (?:large|long)|exceed|over|limit|max(?:imum)?|requested|sent|tokens)|(?:prompt|request|input).*(too (?:large|long)|exceed|over|limit|max(?:imum)?)/i;
|
||||
/context.*overflow|context window.*(too (?:large|long)|exceed|over|limit|max(?:imum)?|requested|sent|tokens)|prompt.*(too (?:large|long)|exceed|over|limit|max(?:imum)?)|(?:request|input).*(?:context|window|length|token).*(too (?:large|long)|exceed|over|limit|max(?:imum)?)/i;
|
||||
const RATE_LIMIT_HINT_RE =
|
||||
/rate limit|too many requests|requests per (?:minute|hour|day)|quota|throttl|429\b/i;
|
||||
|
||||
export function isLikelyContextOverflowError(errorMessage?: string): boolean {
|
||||
if (!errorMessage) {
|
||||
@@ -56,6 +58,9 @@ export function isLikelyContextOverflowError(errorMessage?: string): boolean {
|
||||
if (isContextOverflowError(errorMessage)) {
|
||||
return true;
|
||||
}
|
||||
if (RATE_LIMIT_HINT_RE.test(errorMessage)) {
|
||||
return false;
|
||||
}
|
||||
return CONTEXT_OVERFLOW_HINT_RE.test(errorMessage);
|
||||
}
|
||||
|
||||
@@ -72,9 +77,13 @@ export function isCompactionFailureError(errorMessage?: string): boolean {
|
||||
if (!hasCompactionTerm) {
|
||||
return false;
|
||||
}
|
||||
// For compaction failures, also accept "context overflow" without colon
|
||||
// since the error message itself describes a compaction/summarization failure
|
||||
return isContextOverflowError(errorMessage) || lower.includes("context overflow");
|
||||
// Treat any likely overflow shape as a compaction failure when compaction terms are present.
|
||||
// Providers often vary wording (e.g. "context window exceeded") across APIs.
|
||||
if (isLikelyContextOverflowError(errorMessage)) {
|
||||
return true;
|
||||
}
|
||||
// Keep explicit fallback for bare "context overflow" strings.
|
||||
return lower.includes("context overflow");
|
||||
}
|
||||
|
||||
const ERROR_PAYLOAD_PREFIX_RE =
|
||||
|
||||
@@ -87,7 +87,21 @@ vi.mock("../failover-error.js", () => ({
|
||||
}));
|
||||
|
||||
vi.mock("../usage.js", () => ({
|
||||
normalizeUsage: vi.fn(() => undefined),
|
||||
normalizeUsage: vi.fn((usage?: unknown) =>
|
||||
usage && typeof usage === "object" ? usage : undefined,
|
||||
),
|
||||
derivePromptTokens: vi.fn(
|
||||
(usage?: { input?: number; cacheRead?: number; cacheWrite?: number }) => {
|
||||
if (!usage) {
|
||||
return undefined;
|
||||
}
|
||||
const input = usage.input ?? 0;
|
||||
const cacheRead = usage.cacheRead ?? 0;
|
||||
const cacheWrite = usage.cacheWrite ?? 0;
|
||||
const sum = input + cacheRead + cacheWrite;
|
||||
return sum > 0 ? sum : undefined;
|
||||
},
|
||||
),
|
||||
hasNonzeroUsage: vi.fn(() => false),
|
||||
}));
|
||||
|
||||
@@ -143,6 +157,18 @@ vi.mock("../pi-embedded-helpers.js", async () => {
|
||||
const lower = msg.toLowerCase();
|
||||
return lower.includes("request_too_large") || lower.includes("request size exceeds");
|
||||
},
|
||||
isLikelyContextOverflowError: (msg?: string) => {
|
||||
if (!msg) {
|
||||
return false;
|
||||
}
|
||||
const lower = msg.toLowerCase();
|
||||
return (
|
||||
lower.includes("request_too_large") ||
|
||||
lower.includes("request size exceeds") ||
|
||||
lower.includes("context window exceeded") ||
|
||||
lower.includes("prompt too large")
|
||||
);
|
||||
},
|
||||
isFailoverAssistantError: vi.fn(() => false),
|
||||
isFailoverErrorMessage: vi.fn(() => false),
|
||||
isAuthAssistantError: vi.fn(() => false),
|
||||
@@ -249,6 +275,31 @@ describe("overflow compaction in run loop", () => {
|
||||
expect(result.meta.error).toBeUndefined();
|
||||
});
|
||||
|
||||
it("retries after successful compaction on likely-overflow promptError variants", async () => {
|
||||
const overflowHintError = new Error("Context window exceeded: requested 12000 tokens");
|
||||
|
||||
mockedRunEmbeddedAttempt
|
||||
.mockResolvedValueOnce(makeAttemptResult({ promptError: overflowHintError }))
|
||||
.mockResolvedValueOnce(makeAttemptResult({ promptError: null }));
|
||||
|
||||
mockedCompactDirect.mockResolvedValueOnce({
|
||||
ok: true,
|
||||
compacted: true,
|
||||
result: {
|
||||
summary: "Compacted session",
|
||||
firstKeptEntryId: "entry-6",
|
||||
tokensBefore: 140000,
|
||||
},
|
||||
});
|
||||
|
||||
const result = await runEmbeddedPiAgent(baseParams);
|
||||
|
||||
expect(mockedCompactDirect).toHaveBeenCalledTimes(1);
|
||||
expect(mockedRunEmbeddedAttempt).toHaveBeenCalledTimes(2);
|
||||
expect(log.warn).toHaveBeenCalledWith(expect.stringContaining("source=promptError"));
|
||||
expect(result.meta.error).toBeUndefined();
|
||||
});
|
||||
|
||||
it("returns error if compaction fails", async () => {
|
||||
const overflowError = new Error("request_too_large: Request size exceeds model context window");
|
||||
|
||||
@@ -433,4 +484,31 @@ describe("overflow compaction in run loop", () => {
|
||||
expect(mockedCompactDirect).not.toHaveBeenCalled();
|
||||
expect(log.warn).not.toHaveBeenCalledWith(expect.stringContaining("source=assistantError"));
|
||||
});
|
||||
|
||||
it("sets promptTokens from the latest model call usage, not accumulated attempt usage", async () => {
|
||||
mockedRunEmbeddedAttempt.mockResolvedValue(
|
||||
makeAttemptResult({
|
||||
attemptUsage: {
|
||||
input: 4_000,
|
||||
cacheRead: 120_000,
|
||||
cacheWrite: 0,
|
||||
total: 124_000,
|
||||
},
|
||||
lastAssistant: {
|
||||
stopReason: "end_turn",
|
||||
usage: {
|
||||
input: 900,
|
||||
cacheRead: 1_100,
|
||||
cacheWrite: 0,
|
||||
total: 2_000,
|
||||
},
|
||||
} as EmbeddedRunAttemptResult["lastAssistant"],
|
||||
}),
|
||||
);
|
||||
|
||||
const result = await runEmbeddedPiAgent(baseParams);
|
||||
|
||||
expect(result.meta.agentMeta?.usage?.input).toBe(4_000);
|
||||
expect(result.meta.agentMeta?.promptTokens).toBe(2_000);
|
||||
});
|
||||
});
|
||||
|
||||
@@ -34,7 +34,7 @@ import {
|
||||
isAuthAssistantError,
|
||||
isBillingAssistantError,
|
||||
isCompactionFailureError,
|
||||
isContextOverflowError,
|
||||
isLikelyContextOverflowError,
|
||||
isFailoverAssistantError,
|
||||
isFailoverErrorMessage,
|
||||
parseImageSizeError,
|
||||
@@ -44,7 +44,7 @@ import {
|
||||
pickFallbackThinkingLevel,
|
||||
type FailoverReason,
|
||||
} from "../pi-embedded-helpers.js";
|
||||
import { normalizeUsage, type UsageLike } from "../usage.js";
|
||||
import { derivePromptTokens, normalizeUsage, type UsageLike } from "../usage.js";
|
||||
import { redactRunIdentifier, resolveRunWorkspaceDir } from "../workspace-run.js";
|
||||
import { compactEmbeddedPiSessionDirect } from "./compact.js";
|
||||
import { resolveGlobalLane, resolveSessionLane } from "./lanes.js";
|
||||
@@ -408,6 +408,7 @@ export async function runEmbeddedPiAgent(
|
||||
let overflowCompactionAttempts = 0;
|
||||
let toolResultTruncationAttempted = false;
|
||||
const usageAccumulator = createUsageAccumulator();
|
||||
let lastRunPromptUsage: ReturnType<typeof normalizeUsage> | undefined;
|
||||
let autoCompactionCount = 0;
|
||||
try {
|
||||
while (true) {
|
||||
@@ -475,10 +476,12 @@ export async function runEmbeddedPiAgent(
|
||||
});
|
||||
|
||||
const { aborted, promptError, timedOut, sessionIdUsed, lastAssistant } = attempt;
|
||||
mergeUsageIntoAccumulator(
|
||||
usageAccumulator,
|
||||
attempt.attemptUsage ?? normalizeUsage(lastAssistant?.usage as UsageLike),
|
||||
);
|
||||
const lastAssistantUsage = normalizeUsage(lastAssistant?.usage as UsageLike);
|
||||
const attemptUsage = attempt.attemptUsage ?? lastAssistantUsage;
|
||||
mergeUsageIntoAccumulator(usageAccumulator, attemptUsage);
|
||||
// Keep prompt size from the latest model call so session totalTokens
|
||||
// reflects current context usage, not accumulated tool-loop usage.
|
||||
lastRunPromptUsage = lastAssistantUsage ?? attemptUsage;
|
||||
autoCompactionCount += Math.max(0, attempt.compactionCount ?? 0);
|
||||
const formattedAssistantErrorText = lastAssistant
|
||||
? formatAssistantErrorText(lastAssistant, {
|
||||
@@ -496,14 +499,14 @@ export async function runEmbeddedPiAgent(
|
||||
? (() => {
|
||||
if (promptError) {
|
||||
const errorText = describeUnknownError(promptError);
|
||||
if (isContextOverflowError(errorText)) {
|
||||
if (isLikelyContextOverflowError(errorText)) {
|
||||
return { text: errorText, source: "promptError" as const };
|
||||
}
|
||||
// Prompt submission failed with a non-overflow error. Do not
|
||||
// inspect prior assistant errors from history for this attempt.
|
||||
return null;
|
||||
}
|
||||
if (assistantErrorText && isContextOverflowError(assistantErrorText)) {
|
||||
if (assistantErrorText && isLikelyContextOverflowError(assistantErrorText)) {
|
||||
return { text: assistantErrorText, source: "assistantError" as const };
|
||||
}
|
||||
return null;
|
||||
@@ -826,12 +829,14 @@ export async function runEmbeddedPiAgent(
|
||||
// overstates the actual context size. `lastCallUsage` reflects only
|
||||
// the final call, giving an accurate snapshot of current context.
|
||||
const lastCallUsage = normalizeUsage(lastAssistant?.usage as UsageLike);
|
||||
const promptTokens = derivePromptTokens(lastRunPromptUsage);
|
||||
const agentMeta: EmbeddedPiAgentMeta = {
|
||||
sessionId: sessionIdUsed,
|
||||
provider: lastAssistant?.provider ?? provider,
|
||||
model: lastAssistant?.model ?? model.id,
|
||||
usage,
|
||||
lastCallUsage: lastCallUsage ?? undefined,
|
||||
promptTokens,
|
||||
compactionCount: autoCompactionCount > 0 ? autoCompactionCount : undefined,
|
||||
};
|
||||
|
||||
|
||||
@@ -6,6 +6,7 @@ export type EmbeddedPiAgentMeta = {
|
||||
provider: string;
|
||||
model: string;
|
||||
compactionCount?: number;
|
||||
promptTokens?: number;
|
||||
usage?: {
|
||||
input?: number;
|
||||
output?: number;
|
||||
|
||||
@@ -74,4 +74,19 @@ describe("normalizeUsage", () => {
|
||||
}),
|
||||
).toBe(1_550);
|
||||
});
|
||||
|
||||
it("prefers explicit prompt token overrides", () => {
|
||||
expect(
|
||||
deriveSessionTotalTokens({
|
||||
usage: {
|
||||
input: 1_200,
|
||||
cacheRead: 300,
|
||||
cacheWrite: 50,
|
||||
total: 9_999,
|
||||
},
|
||||
promptTokens: 65_000,
|
||||
contextTokens: 200_000,
|
||||
}),
|
||||
).toBe(65_000);
|
||||
});
|
||||
});
|
||||
|
||||
@@ -112,18 +112,24 @@ export function deriveSessionTotalTokens(params: {
|
||||
cacheWrite?: number;
|
||||
};
|
||||
contextTokens?: number;
|
||||
promptTokens?: number;
|
||||
}): number | undefined {
|
||||
const promptOverride = params.promptTokens;
|
||||
const hasPromptOverride =
|
||||
typeof promptOverride === "number" && Number.isFinite(promptOverride) && promptOverride > 0;
|
||||
const usage = params.usage;
|
||||
if (!usage) {
|
||||
if (!usage && !hasPromptOverride) {
|
||||
return undefined;
|
||||
}
|
||||
const input = usage.input ?? 0;
|
||||
const promptTokens = derivePromptTokens({
|
||||
input: usage.input,
|
||||
cacheRead: usage.cacheRead,
|
||||
cacheWrite: usage.cacheWrite,
|
||||
});
|
||||
let total = promptTokens ?? usage.total ?? input;
|
||||
const input = usage?.input ?? 0;
|
||||
const promptTokens = hasPromptOverride
|
||||
? promptOverride
|
||||
: derivePromptTokens({
|
||||
input: usage?.input,
|
||||
cacheRead: usage?.cacheRead,
|
||||
cacheWrite: usage?.cacheWrite,
|
||||
});
|
||||
let total = promptTokens ?? usage?.total ?? input;
|
||||
if (!(total > 0)) {
|
||||
return undefined;
|
||||
}
|
||||
|
||||
@@ -371,6 +371,7 @@ export async function runReplyAgent(params: {
|
||||
}
|
||||
|
||||
const usage = runResult.meta.agentMeta?.usage;
|
||||
const promptTokens = runResult.meta.agentMeta?.promptTokens;
|
||||
const modelUsed = runResult.meta.agentMeta?.model ?? fallbackModel ?? defaultModel;
|
||||
const providerUsed =
|
||||
runResult.meta.agentMeta?.provider ?? fallbackProvider ?? followupRun.run.provider;
|
||||
@@ -388,6 +389,7 @@ export async function runReplyAgent(params: {
|
||||
sessionKey,
|
||||
usage,
|
||||
lastCallUsage: runResult.meta.agentMeta?.lastCallUsage,
|
||||
promptTokens,
|
||||
modelUsed,
|
||||
providerUsed,
|
||||
contextTokensUsed,
|
||||
|
||||
@@ -194,6 +194,7 @@ export function createFollowupRunner(params: {
|
||||
}
|
||||
|
||||
const usage = runResult.meta.agentMeta?.usage;
|
||||
const promptTokens = runResult.meta.agentMeta?.promptTokens;
|
||||
const modelUsed = runResult.meta.agentMeta?.model ?? fallbackModel ?? defaultModel;
|
||||
const contextTokensUsed =
|
||||
agentCfgContextTokens ??
|
||||
@@ -207,6 +208,7 @@ export function createFollowupRunner(params: {
|
||||
sessionKey,
|
||||
usage,
|
||||
lastCallUsage: runResult.meta.agentMeta?.lastCallUsage,
|
||||
promptTokens,
|
||||
modelUsed,
|
||||
providerUsed: fallbackProvider,
|
||||
contextTokensUsed,
|
||||
|
||||
@@ -25,6 +25,7 @@ export async function persistSessionUsageUpdate(params: {
|
||||
modelUsed?: string;
|
||||
providerUsed?: string;
|
||||
contextTokensUsed?: number;
|
||||
promptTokens?: number;
|
||||
systemPromptReport?: SessionSystemPromptReport;
|
||||
cliSessionId?: string;
|
||||
logLabel?: string;
|
||||
@@ -56,6 +57,7 @@ export async function persistSessionUsageUpdate(params: {
|
||||
deriveSessionTotalTokens({
|
||||
usage: usageForContext,
|
||||
contextTokens: resolvedContextTokens,
|
||||
promptTokens: params.promptTokens,
|
||||
}) ?? input,
|
||||
modelProvider: params.providerUsed ?? entry.modelProvider,
|
||||
model: params.modelUsed ?? entry.model,
|
||||
|
||||
@@ -37,6 +37,7 @@ export async function updateSessionStoreAfterAgentRun(params: {
|
||||
} = params;
|
||||
|
||||
const usage = result.meta.agentMeta?.usage;
|
||||
const promptTokens = result.meta.agentMeta?.promptTokens;
|
||||
const compactionsThisRun = Math.max(0, result.meta.agentMeta?.compactionCount ?? 0);
|
||||
const modelUsed = result.meta.agentMeta?.model ?? fallbackModel ?? defaultModel;
|
||||
const providerUsed = result.meta.agentMeta?.provider ?? fallbackProvider ?? defaultProvider;
|
||||
@@ -71,6 +72,7 @@ export async function updateSessionStoreAfterAgentRun(params: {
|
||||
deriveSessionTotalTokens({
|
||||
usage,
|
||||
contextTokens,
|
||||
promptTokens,
|
||||
}) ?? input;
|
||||
}
|
||||
if (compactionsThisRun > 0) {
|
||||
|
||||
@@ -456,6 +456,7 @@ export async function runCronIsolatedAgentTurn(params: {
|
||||
// Update token+model fields in the session store.
|
||||
{
|
||||
const usage = runResult.meta.agentMeta?.usage;
|
||||
const promptTokens = runResult.meta.agentMeta?.promptTokens;
|
||||
const modelUsed = runResult.meta.agentMeta?.model ?? fallbackModel ?? model;
|
||||
const providerUsed = runResult.meta.agentMeta?.provider ?? fallbackProvider ?? provider;
|
||||
const contextTokens =
|
||||
@@ -479,6 +480,7 @@ export async function runCronIsolatedAgentTurn(params: {
|
||||
deriveSessionTotalTokens({
|
||||
usage,
|
||||
contextTokens,
|
||||
promptTokens,
|
||||
}) ?? input;
|
||||
}
|
||||
await persistSessionEntry();
|
||||
|
||||
Reference in New Issue
Block a user