mirror of
https://github.com/openclaw/openclaw.git
synced 2026-06-02 14:27:14 +00:00
fix: carry observed overflow token counts into compaction
- parse provider-reported overflow token counts from prompt errors - pass observed live counts into context-engine compaction - forward live counts through the legacy context-engine bridge - classify live-over-target failures distinctly in compaction diagnostics - cover the new parser and overflow plumbing with focused tests
This commit is contained in:
committed by
Josh Lehman
parent
f2e28fc30f
commit
26ff413914
@@ -2,6 +2,7 @@ import { describe, expect, it } from "vitest";
|
|||||||
import {
|
import {
|
||||||
classifyFailoverReason,
|
classifyFailoverReason,
|
||||||
classifyFailoverReasonFromHttpStatus,
|
classifyFailoverReasonFromHttpStatus,
|
||||||
|
extractObservedOverflowTokenCount,
|
||||||
isAuthErrorMessage,
|
isAuthErrorMessage,
|
||||||
isAuthPermanentErrorMessage,
|
isAuthPermanentErrorMessage,
|
||||||
isBillingErrorMessage,
|
isBillingErrorMessage,
|
||||||
@@ -461,6 +462,29 @@ describe("isLikelyContextOverflowError", () => {
|
|||||||
});
|
});
|
||||||
});
|
});
|
||||||
|
|
||||||
|
describe("extractObservedOverflowTokenCount", () => {
|
||||||
|
it("extracts provider-reported prompt token counts", () => {
|
||||||
|
expect(
|
||||||
|
extractObservedOverflowTokenCount(
|
||||||
|
'400 {"type":"error","error":{"message":"prompt is too long: 277403 tokens > 200000 maximum"}}',
|
||||||
|
),
|
||||||
|
).toBe(277403);
|
||||||
|
expect(extractObservedOverflowTokenCount("Context window exceeded: requested 12000 tokens")).toBe(
|
||||||
|
12000,
|
||||||
|
);
|
||||||
|
expect(
|
||||||
|
extractObservedOverflowTokenCount(
|
||||||
|
"This model's maximum context length is 128000 tokens. However, your messages resulted in 145000 tokens.",
|
||||||
|
),
|
||||||
|
).toBe(145000);
|
||||||
|
});
|
||||||
|
|
||||||
|
it("returns undefined when overflow counts are not present", () => {
|
||||||
|
expect(extractObservedOverflowTokenCount("Prompt too large for this model")).toBeUndefined();
|
||||||
|
expect(extractObservedOverflowTokenCount("rate limit exceeded")).toBeUndefined();
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
describe("isTransientHttpError", () => {
|
describe("isTransientHttpError", () => {
|
||||||
it("returns true for retryable 5xx status codes", () => {
|
it("returns true for retryable 5xx status codes", () => {
|
||||||
expect(isTransientHttpError("499 Client Closed Request")).toBe(true);
|
expect(isTransientHttpError("499 Client Closed Request")).toBe(true);
|
||||||
|
|||||||
@@ -22,6 +22,7 @@ export {
|
|||||||
isAuthPermanentErrorMessage,
|
isAuthPermanentErrorMessage,
|
||||||
isModelNotFoundErrorMessage,
|
isModelNotFoundErrorMessage,
|
||||||
isBillingAssistantError,
|
isBillingAssistantError,
|
||||||
|
extractObservedOverflowTokenCount,
|
||||||
parseApiErrorInfo,
|
parseApiErrorInfo,
|
||||||
sanitizeUserFacingText,
|
sanitizeUserFacingText,
|
||||||
isBillingErrorMessage,
|
isBillingErrorMessage,
|
||||||
|
|||||||
@@ -185,6 +185,32 @@ export function isCompactionFailureError(errorMessage?: string): boolean {
|
|||||||
return lower.includes("context overflow");
|
return lower.includes("context overflow");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const OBSERVED_OVERFLOW_TOKEN_PATTERNS = [
|
||||||
|
/prompt is too long:\s*([\d,]+)\s+tokens\s*>\s*[\d,]+\s+maximum/i,
|
||||||
|
/requested\s+([\d,]+)\s+tokens/i,
|
||||||
|
/resulted in\s+([\d,]+)\s+tokens/i,
|
||||||
|
];
|
||||||
|
|
||||||
|
export function extractObservedOverflowTokenCount(errorMessage?: string): number | undefined {
|
||||||
|
if (!errorMessage) {
|
||||||
|
return undefined;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (const pattern of OBSERVED_OVERFLOW_TOKEN_PATTERNS) {
|
||||||
|
const match = errorMessage.match(pattern);
|
||||||
|
const rawCount = match?.[1]?.replaceAll(",", "");
|
||||||
|
if (!rawCount) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
const parsed = Number(rawCount);
|
||||||
|
if (Number.isFinite(parsed) && parsed > 0) {
|
||||||
|
return Math.floor(parsed);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return undefined;
|
||||||
|
}
|
||||||
|
|
||||||
const ERROR_PAYLOAD_PREFIX_RE =
|
const ERROR_PAYLOAD_PREFIX_RE =
|
||||||
/^(?:error|api\s*error|apierror|openai\s*error|anthropic\s*error|gateway\s*error)[:\s-]+/i;
|
/^(?:error|api\s*error|apierror|openai\s*error|anthropic\s*error|gateway\s*error)[:\s-]+/i;
|
||||||
const FINAL_TAG_RE = /<\s*\/?\s*final\s*>/gi;
|
const FINAL_TAG_RE = /<\s*\/?\s*final\s*>/gi;
|
||||||
|
|||||||
@@ -114,6 +114,7 @@ export type CompactEmbeddedPiSessionParams = {
|
|||||||
/** Whether the sender is an owner (required for owner-only tools). */
|
/** Whether the sender is an owner (required for owner-only tools). */
|
||||||
senderIsOwner?: boolean;
|
senderIsOwner?: boolean;
|
||||||
sessionFile: string;
|
sessionFile: string;
|
||||||
|
currentTokenCount?: number;
|
||||||
workspaceDir: string;
|
workspaceDir: string;
|
||||||
agentDir?: string;
|
agentDir?: string;
|
||||||
config?: OpenClawConfig;
|
config?: OpenClawConfig;
|
||||||
@@ -228,6 +229,9 @@ function classifyCompactionReason(reason?: string): string {
|
|||||||
if (text.includes("already compacted")) {
|
if (text.includes("already compacted")) {
|
||||||
return "already_compacted_recently";
|
return "already_compacted_recently";
|
||||||
}
|
}
|
||||||
|
if (text.includes("still exceeds target")) {
|
||||||
|
return "live_context_still_exceeds_target";
|
||||||
|
}
|
||||||
if (text.includes("guard")) {
|
if (text.includes("guard")) {
|
||||||
return "guard_blocked";
|
return "guard_blocked";
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -109,13 +109,21 @@ vi.mock("../workspace-run.js", () => ({
|
|||||||
vi.mock("../pi-embedded-helpers.js", () => ({
|
vi.mock("../pi-embedded-helpers.js", () => ({
|
||||||
formatBillingErrorMessage: vi.fn(() => ""),
|
formatBillingErrorMessage: vi.fn(() => ""),
|
||||||
classifyFailoverReason: vi.fn(() => null),
|
classifyFailoverReason: vi.fn(() => null),
|
||||||
|
extractObservedOverflowTokenCount: vi.fn((msg?: string) => {
|
||||||
|
const match = msg?.match(/prompt is too long:\s*(\d+)\s+tokens/i);
|
||||||
|
return match?.[1] ? Number(match[1]) : undefined;
|
||||||
|
}),
|
||||||
formatAssistantErrorText: vi.fn(() => ""),
|
formatAssistantErrorText: vi.fn(() => ""),
|
||||||
isAuthAssistantError: vi.fn(() => false),
|
isAuthAssistantError: vi.fn(() => false),
|
||||||
isBillingAssistantError: vi.fn(() => false),
|
isBillingAssistantError: vi.fn(() => false),
|
||||||
isCompactionFailureError: vi.fn(() => false),
|
isCompactionFailureError: vi.fn(() => false),
|
||||||
isLikelyContextOverflowError: vi.fn((msg?: string) => {
|
isLikelyContextOverflowError: vi.fn((msg?: string) => {
|
||||||
const lower = (msg ?? "").toLowerCase();
|
const lower = (msg ?? "").toLowerCase();
|
||||||
return lower.includes("request_too_large") || lower.includes("context window exceeded");
|
return (
|
||||||
|
lower.includes("request_too_large") ||
|
||||||
|
lower.includes("context window exceeded") ||
|
||||||
|
lower.includes("prompt is too long")
|
||||||
|
);
|
||||||
}),
|
}),
|
||||||
isFailoverAssistantError: vi.fn(() => false),
|
isFailoverAssistantError: vi.fn(() => false),
|
||||||
isFailoverErrorMessage: vi.fn(() => false),
|
isFailoverErrorMessage: vi.fn(() => false),
|
||||||
|
|||||||
@@ -111,6 +111,32 @@ describe("runEmbeddedPiAgent overflow compaction trigger routing", () => {
|
|||||||
);
|
);
|
||||||
});
|
});
|
||||||
|
|
||||||
|
it("passes observed overflow token counts into compaction when providers report them", async () => {
|
||||||
|
const overflowError = new Error(
|
||||||
|
'400 {"type":"error","error":{"type":"invalid_request_error","message":"prompt is too long: 277403 tokens > 200000 maximum"}}',
|
||||||
|
);
|
||||||
|
|
||||||
|
mockedRunEmbeddedAttempt
|
||||||
|
.mockResolvedValueOnce(makeAttemptResult({ promptError: overflowError }))
|
||||||
|
.mockResolvedValueOnce(makeAttemptResult({ promptError: null }));
|
||||||
|
mockedCompactDirect.mockResolvedValueOnce(
|
||||||
|
makeCompactionSuccess({
|
||||||
|
summary: "Compacted session",
|
||||||
|
firstKeptEntryId: "entry-8",
|
||||||
|
tokensBefore: 277403,
|
||||||
|
}),
|
||||||
|
);
|
||||||
|
|
||||||
|
const result = await runEmbeddedPiAgent(overflowBaseRunParams);
|
||||||
|
|
||||||
|
expect(mockedCompactDirect).toHaveBeenCalledWith(
|
||||||
|
expect.objectContaining({
|
||||||
|
currentTokenCount: 277403,
|
||||||
|
}),
|
||||||
|
);
|
||||||
|
expect(result.meta.error).toBeUndefined();
|
||||||
|
});
|
||||||
|
|
||||||
it("does not reset compaction attempt budget after successful tool-result truncation", async () => {
|
it("does not reset compaction attempt budget after successful tool-result truncation", async () => {
|
||||||
const overflowError = queueOverflowAttemptWithOversizedToolOutput(
|
const overflowError = queueOverflowAttemptWithOversizedToolOutput(
|
||||||
mockedRunEmbeddedAttempt,
|
mockedRunEmbeddedAttempt,
|
||||||
|
|||||||
@@ -40,6 +40,7 @@ import { ensureOpenClawModelsJson } from "../models-config.js";
|
|||||||
import {
|
import {
|
||||||
formatBillingErrorMessage,
|
formatBillingErrorMessage,
|
||||||
classifyFailoverReason,
|
classifyFailoverReason,
|
||||||
|
extractObservedOverflowTokenCount,
|
||||||
formatAssistantErrorText,
|
formatAssistantErrorText,
|
||||||
isAuthAssistantError,
|
isAuthAssistantError,
|
||||||
isBillingAssistantError,
|
isBillingAssistantError,
|
||||||
@@ -988,11 +989,13 @@ export async function runEmbeddedPiAgent(
|
|||||||
const overflowDiagId = createCompactionDiagId();
|
const overflowDiagId = createCompactionDiagId();
|
||||||
const errorText = contextOverflowError.text;
|
const errorText = contextOverflowError.text;
|
||||||
const msgCount = attempt.messagesSnapshot?.length ?? 0;
|
const msgCount = attempt.messagesSnapshot?.length ?? 0;
|
||||||
|
const observedOverflowTokens = extractObservedOverflowTokenCount(errorText);
|
||||||
log.warn(
|
log.warn(
|
||||||
`[context-overflow-diag] sessionKey=${params.sessionKey ?? params.sessionId} ` +
|
`[context-overflow-diag] sessionKey=${params.sessionKey ?? params.sessionId} ` +
|
||||||
`provider=${provider}/${modelId} source=${contextOverflowError.source} ` +
|
`provider=${provider}/${modelId} source=${contextOverflowError.source} ` +
|
||||||
`messages=${msgCount} sessionFile=${params.sessionFile} ` +
|
`messages=${msgCount} sessionFile=${params.sessionFile} ` +
|
||||||
`diagId=${overflowDiagId} compactionAttempts=${overflowCompactionAttempts} ` +
|
`diagId=${overflowDiagId} compactionAttempts=${overflowCompactionAttempts} ` +
|
||||||
|
`observedTokens=${observedOverflowTokens ?? "unknown"} ` +
|
||||||
`error=${errorText.slice(0, 200)}`,
|
`error=${errorText.slice(0, 200)}`,
|
||||||
);
|
);
|
||||||
const isCompactionFailure = isCompactionFailureError(errorText);
|
const isCompactionFailure = isCompactionFailureError(errorText);
|
||||||
@@ -1052,6 +1055,9 @@ export async function runEmbeddedPiAgent(
|
|||||||
sessionId: params.sessionId,
|
sessionId: params.sessionId,
|
||||||
sessionFile: params.sessionFile,
|
sessionFile: params.sessionFile,
|
||||||
tokenBudget: ctxInfo.tokens,
|
tokenBudget: ctxInfo.tokens,
|
||||||
|
...(observedOverflowTokens !== undefined
|
||||||
|
? { currentTokenCount: observedOverflowTokens }
|
||||||
|
: {}),
|
||||||
force: true,
|
force: true,
|
||||||
compactionTarget: "budget",
|
compactionTarget: "budget",
|
||||||
runtimeContext: {
|
runtimeContext: {
|
||||||
@@ -1074,6 +1080,9 @@ export async function runEmbeddedPiAgent(
|
|||||||
extraSystemPrompt: params.extraSystemPrompt,
|
extraSystemPrompt: params.extraSystemPrompt,
|
||||||
ownerNumbers: params.ownerNumbers,
|
ownerNumbers: params.ownerNumbers,
|
||||||
trigger: "overflow",
|
trigger: "overflow",
|
||||||
|
...(observedOverflowTokens !== undefined
|
||||||
|
? { currentTokenCount: observedOverflowTokens }
|
||||||
|
: {}),
|
||||||
diagId: overflowDiagId,
|
diagId: overflowDiagId,
|
||||||
attempt: overflowCompactionAttempts,
|
attempt: overflowCompactionAttempts,
|
||||||
maxAttempts: MAX_OVERFLOW_COMPACTION_ATTEMPTS,
|
maxAttempts: MAX_OVERFLOW_COMPACTION_ATTEMPTS,
|
||||||
|
|||||||
@@ -85,6 +85,7 @@ export class LegacyContextEngine implements ContextEngine {
|
|||||||
sessionId: params.sessionId,
|
sessionId: params.sessionId,
|
||||||
sessionFile: params.sessionFile,
|
sessionFile: params.sessionFile,
|
||||||
tokenBudget: params.tokenBudget,
|
tokenBudget: params.tokenBudget,
|
||||||
|
currentTokenCount: params.currentTokenCount,
|
||||||
force: params.force,
|
force: params.force,
|
||||||
customInstructions: params.customInstructions,
|
customInstructions: params.customInstructions,
|
||||||
workspaceDir: (runtimeContext.workspaceDir as string) ?? process.cwd(),
|
workspaceDir: (runtimeContext.workspaceDir as string) ?? process.cwd(),
|
||||||
|
|||||||
Reference in New Issue
Block a user