mirror of
https://github.com/openclaw/openclaw.git
synced 2026-05-24 15:24:27 +00:00
fix: carry observed overflow token counts into compaction
- parse provider-reported overflow token counts from prompt errors - pass observed live counts into context-engine compaction - forward live counts through the legacy context-engine bridge - classify live-over-target failures distinctly in compaction diagnostics - cover the new parser and overflow plumbing with focused tests
This commit is contained in:
committed by
Josh Lehman
parent
f2e28fc30f
commit
26ff413914
@@ -2,6 +2,7 @@ import { describe, expect, it } from "vitest";
|
||||
import {
|
||||
classifyFailoverReason,
|
||||
classifyFailoverReasonFromHttpStatus,
|
||||
extractObservedOverflowTokenCount,
|
||||
isAuthErrorMessage,
|
||||
isAuthPermanentErrorMessage,
|
||||
isBillingErrorMessage,
|
||||
@@ -461,6 +462,29 @@ describe("isLikelyContextOverflowError", () => {
|
||||
});
|
||||
});
|
||||
|
||||
describe("extractObservedOverflowTokenCount", () => {
|
||||
it("extracts provider-reported prompt token counts", () => {
|
||||
expect(
|
||||
extractObservedOverflowTokenCount(
|
||||
'400 {"type":"error","error":{"message":"prompt is too long: 277403 tokens > 200000 maximum"}}',
|
||||
),
|
||||
).toBe(277403);
|
||||
expect(extractObservedOverflowTokenCount("Context window exceeded: requested 12000 tokens")).toBe(
|
||||
12000,
|
||||
);
|
||||
expect(
|
||||
extractObservedOverflowTokenCount(
|
||||
"This model's maximum context length is 128000 tokens. However, your messages resulted in 145000 tokens.",
|
||||
),
|
||||
).toBe(145000);
|
||||
});
|
||||
|
||||
it("returns undefined when overflow counts are not present", () => {
|
||||
expect(extractObservedOverflowTokenCount("Prompt too large for this model")).toBeUndefined();
|
||||
expect(extractObservedOverflowTokenCount("rate limit exceeded")).toBeUndefined();
|
||||
});
|
||||
});
|
||||
|
||||
describe("isTransientHttpError", () => {
|
||||
it("returns true for retryable 5xx status codes", () => {
|
||||
expect(isTransientHttpError("499 Client Closed Request")).toBe(true);
|
||||
|
||||
@@ -22,6 +22,7 @@ export {
|
||||
isAuthPermanentErrorMessage,
|
||||
isModelNotFoundErrorMessage,
|
||||
isBillingAssistantError,
|
||||
extractObservedOverflowTokenCount,
|
||||
parseApiErrorInfo,
|
||||
sanitizeUserFacingText,
|
||||
isBillingErrorMessage,
|
||||
|
||||
@@ -185,6 +185,32 @@ export function isCompactionFailureError(errorMessage?: string): boolean {
|
||||
return lower.includes("context overflow");
|
||||
}
|
||||
|
||||
const OBSERVED_OVERFLOW_TOKEN_PATTERNS = [
|
||||
/prompt is too long:\s*([\d,]+)\s+tokens\s*>\s*[\d,]+\s+maximum/i,
|
||||
/requested\s+([\d,]+)\s+tokens/i,
|
||||
/resulted in\s+([\d,]+)\s+tokens/i,
|
||||
];
|
||||
|
||||
export function extractObservedOverflowTokenCount(errorMessage?: string): number | undefined {
|
||||
if (!errorMessage) {
|
||||
return undefined;
|
||||
}
|
||||
|
||||
for (const pattern of OBSERVED_OVERFLOW_TOKEN_PATTERNS) {
|
||||
const match = errorMessage.match(pattern);
|
||||
const rawCount = match?.[1]?.replaceAll(",", "");
|
||||
if (!rawCount) {
|
||||
continue;
|
||||
}
|
||||
const parsed = Number(rawCount);
|
||||
if (Number.isFinite(parsed) && parsed > 0) {
|
||||
return Math.floor(parsed);
|
||||
}
|
||||
}
|
||||
|
||||
return undefined;
|
||||
}
|
||||
|
||||
const ERROR_PAYLOAD_PREFIX_RE =
|
||||
/^(?:error|api\s*error|apierror|openai\s*error|anthropic\s*error|gateway\s*error)[:\s-]+/i;
|
||||
const FINAL_TAG_RE = /<\s*\/?\s*final\s*>/gi;
|
||||
|
||||
@@ -114,6 +114,7 @@ export type CompactEmbeddedPiSessionParams = {
|
||||
/** Whether the sender is an owner (required for owner-only tools). */
|
||||
senderIsOwner?: boolean;
|
||||
sessionFile: string;
|
||||
currentTokenCount?: number;
|
||||
workspaceDir: string;
|
||||
agentDir?: string;
|
||||
config?: OpenClawConfig;
|
||||
@@ -228,6 +229,9 @@ function classifyCompactionReason(reason?: string): string {
|
||||
if (text.includes("already compacted")) {
|
||||
return "already_compacted_recently";
|
||||
}
|
||||
if (text.includes("still exceeds target")) {
|
||||
return "live_context_still_exceeds_target";
|
||||
}
|
||||
if (text.includes("guard")) {
|
||||
return "guard_blocked";
|
||||
}
|
||||
|
||||
@@ -109,13 +109,21 @@ vi.mock("../workspace-run.js", () => ({
|
||||
vi.mock("../pi-embedded-helpers.js", () => ({
|
||||
formatBillingErrorMessage: vi.fn(() => ""),
|
||||
classifyFailoverReason: vi.fn(() => null),
|
||||
extractObservedOverflowTokenCount: vi.fn((msg?: string) => {
|
||||
const match = msg?.match(/prompt is too long:\s*(\d+)\s+tokens/i);
|
||||
return match?.[1] ? Number(match[1]) : undefined;
|
||||
}),
|
||||
formatAssistantErrorText: vi.fn(() => ""),
|
||||
isAuthAssistantError: vi.fn(() => false),
|
||||
isBillingAssistantError: vi.fn(() => false),
|
||||
isCompactionFailureError: vi.fn(() => false),
|
||||
isLikelyContextOverflowError: vi.fn((msg?: string) => {
|
||||
const lower = (msg ?? "").toLowerCase();
|
||||
return lower.includes("request_too_large") || lower.includes("context window exceeded");
|
||||
return (
|
||||
lower.includes("request_too_large") ||
|
||||
lower.includes("context window exceeded") ||
|
||||
lower.includes("prompt is too long")
|
||||
);
|
||||
}),
|
||||
isFailoverAssistantError: vi.fn(() => false),
|
||||
isFailoverErrorMessage: vi.fn(() => false),
|
||||
|
||||
@@ -111,6 +111,32 @@ describe("runEmbeddedPiAgent overflow compaction trigger routing", () => {
|
||||
);
|
||||
});
|
||||
|
||||
it("passes observed overflow token counts into compaction when providers report them", async () => {
|
||||
const overflowError = new Error(
|
||||
'400 {"type":"error","error":{"type":"invalid_request_error","message":"prompt is too long: 277403 tokens > 200000 maximum"}}',
|
||||
);
|
||||
|
||||
mockedRunEmbeddedAttempt
|
||||
.mockResolvedValueOnce(makeAttemptResult({ promptError: overflowError }))
|
||||
.mockResolvedValueOnce(makeAttemptResult({ promptError: null }));
|
||||
mockedCompactDirect.mockResolvedValueOnce(
|
||||
makeCompactionSuccess({
|
||||
summary: "Compacted session",
|
||||
firstKeptEntryId: "entry-8",
|
||||
tokensBefore: 277403,
|
||||
}),
|
||||
);
|
||||
|
||||
const result = await runEmbeddedPiAgent(overflowBaseRunParams);
|
||||
|
||||
expect(mockedCompactDirect).toHaveBeenCalledWith(
|
||||
expect.objectContaining({
|
||||
currentTokenCount: 277403,
|
||||
}),
|
||||
);
|
||||
expect(result.meta.error).toBeUndefined();
|
||||
});
|
||||
|
||||
it("does not reset compaction attempt budget after successful tool-result truncation", async () => {
|
||||
const overflowError = queueOverflowAttemptWithOversizedToolOutput(
|
||||
mockedRunEmbeddedAttempt,
|
||||
|
||||
@@ -40,6 +40,7 @@ import { ensureOpenClawModelsJson } from "../models-config.js";
|
||||
import {
|
||||
formatBillingErrorMessage,
|
||||
classifyFailoverReason,
|
||||
extractObservedOverflowTokenCount,
|
||||
formatAssistantErrorText,
|
||||
isAuthAssistantError,
|
||||
isBillingAssistantError,
|
||||
@@ -988,11 +989,13 @@ export async function runEmbeddedPiAgent(
|
||||
const overflowDiagId = createCompactionDiagId();
|
||||
const errorText = contextOverflowError.text;
|
||||
const msgCount = attempt.messagesSnapshot?.length ?? 0;
|
||||
const observedOverflowTokens = extractObservedOverflowTokenCount(errorText);
|
||||
log.warn(
|
||||
`[context-overflow-diag] sessionKey=${params.sessionKey ?? params.sessionId} ` +
|
||||
`provider=${provider}/${modelId} source=${contextOverflowError.source} ` +
|
||||
`messages=${msgCount} sessionFile=${params.sessionFile} ` +
|
||||
`diagId=${overflowDiagId} compactionAttempts=${overflowCompactionAttempts} ` +
|
||||
`observedTokens=${observedOverflowTokens ?? "unknown"} ` +
|
||||
`error=${errorText.slice(0, 200)}`,
|
||||
);
|
||||
const isCompactionFailure = isCompactionFailureError(errorText);
|
||||
@@ -1052,6 +1055,9 @@ export async function runEmbeddedPiAgent(
|
||||
sessionId: params.sessionId,
|
||||
sessionFile: params.sessionFile,
|
||||
tokenBudget: ctxInfo.tokens,
|
||||
...(observedOverflowTokens !== undefined
|
||||
? { currentTokenCount: observedOverflowTokens }
|
||||
: {}),
|
||||
force: true,
|
||||
compactionTarget: "budget",
|
||||
runtimeContext: {
|
||||
@@ -1074,6 +1080,9 @@ export async function runEmbeddedPiAgent(
|
||||
extraSystemPrompt: params.extraSystemPrompt,
|
||||
ownerNumbers: params.ownerNumbers,
|
||||
trigger: "overflow",
|
||||
...(observedOverflowTokens !== undefined
|
||||
? { currentTokenCount: observedOverflowTokens }
|
||||
: {}),
|
||||
diagId: overflowDiagId,
|
||||
attempt: overflowCompactionAttempts,
|
||||
maxAttempts: MAX_OVERFLOW_COMPACTION_ATTEMPTS,
|
||||
|
||||
@@ -85,6 +85,7 @@ export class LegacyContextEngine implements ContextEngine {
|
||||
sessionId: params.sessionId,
|
||||
sessionFile: params.sessionFile,
|
||||
tokenBudget: params.tokenBudget,
|
||||
currentTokenCount: params.currentTokenCount,
|
||||
force: params.force,
|
||||
customInstructions: params.customInstructions,
|
||||
workspaceDir: (runtimeContext.workspaceDir as string) ?? process.cwd(),
|
||||
|
||||
Reference in New Issue
Block a user