diff --git a/src/agents/pi-embedded-helpers.isbillingerrormessage.test.ts b/src/agents/pi-embedded-helpers.isbillingerrormessage.test.ts index 9ed183a6910..cd49ecb8be2 100644 --- a/src/agents/pi-embedded-helpers.isbillingerrormessage.test.ts +++ b/src/agents/pi-embedded-helpers.isbillingerrormessage.test.ts @@ -2,6 +2,7 @@ import { describe, expect, it } from "vitest"; import { classifyFailoverReason, classifyFailoverReasonFromHttpStatus, + extractObservedOverflowTokenCount, isAuthErrorMessage, isAuthPermanentErrorMessage, isBillingErrorMessage, @@ -461,6 +462,29 @@ describe("isLikelyContextOverflowError", () => { }); }); +describe("extractObservedOverflowTokenCount", () => { + it("extracts provider-reported prompt token counts", () => { + expect( + extractObservedOverflowTokenCount( + '400 {"type":"error","error":{"message":"prompt is too long: 277403 tokens > 200000 maximum"}}', + ), + ).toBe(277403); + expect(extractObservedOverflowTokenCount("Context window exceeded: requested 12000 tokens")).toBe( + 12000, + ); + expect( + extractObservedOverflowTokenCount( + "This model's maximum context length is 128000 tokens. However, your messages resulted in 145000 tokens.", + ), + ).toBe(145000); + }); + + it("returns undefined when overflow counts are not present", () => { + expect(extractObservedOverflowTokenCount("Prompt too large for this model")).toBeUndefined(); + expect(extractObservedOverflowTokenCount("rate limit exceeded")).toBeUndefined(); + }); +}); + describe("isTransientHttpError", () => { it("returns true for retryable 5xx status codes", () => { expect(isTransientHttpError("499 Client Closed Request")).toBe(true); diff --git a/src/agents/pi-embedded-helpers.ts b/src/agents/pi-embedded-helpers.ts index 53f21814492..77ae492bc32 100644 --- a/src/agents/pi-embedded-helpers.ts +++ b/src/agents/pi-embedded-helpers.ts @@ -22,6 +22,7 @@ export { isAuthPermanentErrorMessage, isModelNotFoundErrorMessage, isBillingAssistantError, + extractObservedOverflowTokenCount, parseApiErrorInfo, sanitizeUserFacingText, isBillingErrorMessage, diff --git a/src/agents/pi-embedded-helpers/errors.ts b/src/agents/pi-embedded-helpers/errors.ts index e9bfd92951e..28fcf328e87 100644 --- a/src/agents/pi-embedded-helpers/errors.ts +++ b/src/agents/pi-embedded-helpers/errors.ts @@ -185,6 +185,32 @@ export function isCompactionFailureError(errorMessage?: string): boolean { return lower.includes("context overflow"); } +const OBSERVED_OVERFLOW_TOKEN_PATTERNS = [ + /prompt is too long:\s*([\d,]+)\s+tokens\s*>\s*[\d,]+\s+maximum/i, + /requested\s+([\d,]+)\s+tokens/i, + /resulted in\s+([\d,]+)\s+tokens/i, +]; + +export function extractObservedOverflowTokenCount(errorMessage?: string): number | undefined { + if (!errorMessage) { + return undefined; + } + + for (const pattern of OBSERVED_OVERFLOW_TOKEN_PATTERNS) { + const match = errorMessage.match(pattern); + const rawCount = match?.[1]?.replaceAll(",", ""); + if (!rawCount) { + continue; + } + const parsed = Number(rawCount); + if (Number.isFinite(parsed) && parsed > 0) { + return Math.floor(parsed); + } + } + + return undefined; +} + const ERROR_PAYLOAD_PREFIX_RE = /^(?:error|api\s*error|apierror|openai\s*error|anthropic\s*error|gateway\s*error)[:\s-]+/i; const FINAL_TAG_RE = /<\s*\/?\s*final\s*>/gi; diff --git a/src/agents/pi-embedded-runner/compact.ts b/src/agents/pi-embedded-runner/compact.ts index feba0f81493..32d9b89c83d 100644 --- a/src/agents/pi-embedded-runner/compact.ts +++ b/src/agents/pi-embedded-runner/compact.ts @@ -114,6 +114,7 @@ export type CompactEmbeddedPiSessionParams = { /** Whether the sender is an owner (required for owner-only tools). */ senderIsOwner?: boolean; sessionFile: string; + currentTokenCount?: number; workspaceDir: string; agentDir?: string; config?: OpenClawConfig; @@ -228,6 +229,9 @@ function classifyCompactionReason(reason?: string): string { if (text.includes("already compacted")) { return "already_compacted_recently"; } + if (text.includes("still exceeds target")) { + return "live_context_still_exceeds_target"; + } if (text.includes("guard")) { return "guard_blocked"; } diff --git a/src/agents/pi-embedded-runner/run.overflow-compaction.mocks.shared.ts b/src/agents/pi-embedded-runner/run.overflow-compaction.mocks.shared.ts index 51f711508b1..646ff0a37ef 100644 --- a/src/agents/pi-embedded-runner/run.overflow-compaction.mocks.shared.ts +++ b/src/agents/pi-embedded-runner/run.overflow-compaction.mocks.shared.ts @@ -109,13 +109,21 @@ vi.mock("../workspace-run.js", () => ({ vi.mock("../pi-embedded-helpers.js", () => ({ formatBillingErrorMessage: vi.fn(() => ""), classifyFailoverReason: vi.fn(() => null), + extractObservedOverflowTokenCount: vi.fn((msg?: string) => { + const match = msg?.match(/prompt is too long:\s*(\d+)\s+tokens/i); + return match?.[1] ? Number(match[1]) : undefined; + }), formatAssistantErrorText: vi.fn(() => ""), isAuthAssistantError: vi.fn(() => false), isBillingAssistantError: vi.fn(() => false), isCompactionFailureError: vi.fn(() => false), isLikelyContextOverflowError: vi.fn((msg?: string) => { const lower = (msg ?? "").toLowerCase(); - return lower.includes("request_too_large") || lower.includes("context window exceeded"); + return ( + lower.includes("request_too_large") || + lower.includes("context window exceeded") || + lower.includes("prompt is too long") + ); }), isFailoverAssistantError: vi.fn(() => false), isFailoverErrorMessage: vi.fn(() => false), diff --git a/src/agents/pi-embedded-runner/run.overflow-compaction.test.ts b/src/agents/pi-embedded-runner/run.overflow-compaction.test.ts index b29394eedfd..b9f7707c0b6 100644 --- a/src/agents/pi-embedded-runner/run.overflow-compaction.test.ts +++ b/src/agents/pi-embedded-runner/run.overflow-compaction.test.ts @@ -111,6 +111,32 @@ describe("runEmbeddedPiAgent overflow compaction trigger routing", () => { ); }); + it("passes observed overflow token counts into compaction when providers report them", async () => { + const overflowError = new Error( + '400 {"type":"error","error":{"type":"invalid_request_error","message":"prompt is too long: 277403 tokens > 200000 maximum"}}', + ); + + mockedRunEmbeddedAttempt + .mockResolvedValueOnce(makeAttemptResult({ promptError: overflowError })) + .mockResolvedValueOnce(makeAttemptResult({ promptError: null })); + mockedCompactDirect.mockResolvedValueOnce( + makeCompactionSuccess({ + summary: "Compacted session", + firstKeptEntryId: "entry-8", + tokensBefore: 277403, + }), + ); + + const result = await runEmbeddedPiAgent(overflowBaseRunParams); + + expect(mockedCompactDirect).toHaveBeenCalledWith( + expect.objectContaining({ + currentTokenCount: 277403, + }), + ); + expect(result.meta.error).toBeUndefined(); + }); + it("does not reset compaction attempt budget after successful tool-result truncation", async () => { const overflowError = queueOverflowAttemptWithOversizedToolOutput( mockedRunEmbeddedAttempt, diff --git a/src/agents/pi-embedded-runner/run.ts b/src/agents/pi-embedded-runner/run.ts index 09d5adda724..32afe874442 100644 --- a/src/agents/pi-embedded-runner/run.ts +++ b/src/agents/pi-embedded-runner/run.ts @@ -40,6 +40,7 @@ import { ensureOpenClawModelsJson } from "../models-config.js"; import { formatBillingErrorMessage, classifyFailoverReason, + extractObservedOverflowTokenCount, formatAssistantErrorText, isAuthAssistantError, isBillingAssistantError, @@ -988,11 +989,13 @@ export async function runEmbeddedPiAgent( const overflowDiagId = createCompactionDiagId(); const errorText = contextOverflowError.text; const msgCount = attempt.messagesSnapshot?.length ?? 0; + const observedOverflowTokens = extractObservedOverflowTokenCount(errorText); log.warn( `[context-overflow-diag] sessionKey=${params.sessionKey ?? params.sessionId} ` + `provider=${provider}/${modelId} source=${contextOverflowError.source} ` + `messages=${msgCount} sessionFile=${params.sessionFile} ` + `diagId=${overflowDiagId} compactionAttempts=${overflowCompactionAttempts} ` + + `observedTokens=${observedOverflowTokens ?? "unknown"} ` + `error=${errorText.slice(0, 200)}`, ); const isCompactionFailure = isCompactionFailureError(errorText); @@ -1052,6 +1055,9 @@ export async function runEmbeddedPiAgent( sessionId: params.sessionId, sessionFile: params.sessionFile, tokenBudget: ctxInfo.tokens, + ...(observedOverflowTokens !== undefined + ? { currentTokenCount: observedOverflowTokens } + : {}), force: true, compactionTarget: "budget", runtimeContext: { @@ -1074,6 +1080,9 @@ export async function runEmbeddedPiAgent( extraSystemPrompt: params.extraSystemPrompt, ownerNumbers: params.ownerNumbers, trigger: "overflow", + ...(observedOverflowTokens !== undefined + ? { currentTokenCount: observedOverflowTokens } + : {}), diagId: overflowDiagId, attempt: overflowCompactionAttempts, maxAttempts: MAX_OVERFLOW_COMPACTION_ATTEMPTS, diff --git a/src/context-engine/legacy.ts b/src/context-engine/legacy.ts index 011022ae26a..d3068dcdfb0 100644 --- a/src/context-engine/legacy.ts +++ b/src/context-engine/legacy.ts @@ -85,6 +85,7 @@ export class LegacyContextEngine implements ContextEngine { sessionId: params.sessionId, sessionFile: params.sessionFile, tokenBudget: params.tokenBudget, + currentTokenCount: params.currentTokenCount, force: params.force, customInstructions: params.customInstructions, workspaceDir: (runtimeContext.workspaceDir as string) ?? process.cwd(),