mirror of
https://github.com/openclaw/openclaw.git
synced 2026-06-07 22:09:57 +00:00
fix: carry observed overflow token counts into compaction (#40357)
Merged via squash.
Prepared head SHA: b99eed4329
Co-authored-by: rabsef-bicrym <52549148+rabsef-bicrym@users.noreply.github.com>
Co-authored-by: jalehman <550978+jalehman@users.noreply.github.com>
Reviewed-by: @jalehman
This commit is contained in:
@@ -298,7 +298,7 @@ export function createAnthropicToolPayloadCompatibilityWrapper(
|
||||
);
|
||||
}
|
||||
}
|
||||
return originalOnPayload?.(payload, model);
|
||||
return originalOnPayload?.(payload);
|
||||
},
|
||||
});
|
||||
};
|
||||
|
||||
@@ -114,6 +114,8 @@ export type CompactEmbeddedPiSessionParams = {
|
||||
/** Whether the sender is an owner (required for owner-only tools). */
|
||||
senderIsOwner?: boolean;
|
||||
sessionFile: string;
|
||||
/** Optional caller-observed live prompt tokens used for compaction diagnostics. */
|
||||
currentTokenCount?: number;
|
||||
workspaceDir: string;
|
||||
agentDir?: string;
|
||||
config?: OpenClawConfig;
|
||||
@@ -152,6 +154,12 @@ function createCompactionDiagId(): string {
|
||||
return `cmp-${Date.now().toString(36)}-${generateSecureToken(4)}`;
|
||||
}
|
||||
|
||||
function normalizeObservedTokenCount(value: unknown): number | undefined {
|
||||
return typeof value === "number" && Number.isFinite(value) && value > 0
|
||||
? Math.floor(value)
|
||||
: undefined;
|
||||
}
|
||||
|
||||
function getMessageTextChars(msg: AgentMessage): number {
|
||||
const content = (msg as { content?: unknown }).content;
|
||||
if (typeof content === "string") {
|
||||
@@ -228,6 +236,9 @@ function classifyCompactionReason(reason?: string): string {
|
||||
if (text.includes("already compacted")) {
|
||||
return "already_compacted_recently";
|
||||
}
|
||||
if (text.includes("still exceeds target")) {
|
||||
return "live_context_still_exceeds_target";
|
||||
}
|
||||
if (text.includes("guard")) {
|
||||
return "guard_blocked";
|
||||
}
|
||||
@@ -701,6 +712,7 @@ export async function compactEmbeddedPiSessionDirect(
|
||||
const missingSessionKey = !params.sessionKey || !params.sessionKey.trim();
|
||||
const hookSessionKey = params.sessionKey?.trim() || params.sessionId;
|
||||
const hookRunner = getGlobalHookRunner();
|
||||
const observedTokenCount = normalizeObservedTokenCount(params.currentTokenCount);
|
||||
const messageCountOriginal = originalMessages.length;
|
||||
let tokenCountOriginal: number | undefined;
|
||||
try {
|
||||
@@ -712,14 +724,16 @@ export async function compactEmbeddedPiSessionDirect(
|
||||
tokenCountOriginal = undefined;
|
||||
}
|
||||
const messageCountBefore = session.messages.length;
|
||||
let tokenCountBefore: number | undefined;
|
||||
try {
|
||||
tokenCountBefore = 0;
|
||||
for (const message of session.messages) {
|
||||
tokenCountBefore += estimateTokens(message);
|
||||
let tokenCountBefore = observedTokenCount;
|
||||
if (tokenCountBefore === undefined) {
|
||||
try {
|
||||
tokenCountBefore = 0;
|
||||
for (const message of session.messages) {
|
||||
tokenCountBefore += estimateTokens(message);
|
||||
}
|
||||
} catch {
|
||||
tokenCountBefore = undefined;
|
||||
}
|
||||
} catch {
|
||||
tokenCountBefore = undefined;
|
||||
}
|
||||
// TODO(#7175): Consider exposing full message snapshots or pre-compaction injection
|
||||
// hooks; current events only report counts/metadata.
|
||||
@@ -802,7 +816,7 @@ export async function compactEmbeddedPiSessionDirect(
|
||||
tokensAfter += estimateTokens(message);
|
||||
}
|
||||
// Sanity check: tokensAfter should be less than tokensBefore
|
||||
if (tokensAfter > result.tokensBefore) {
|
||||
if (tokensAfter > (observedTokenCount ?? result.tokensBefore)) {
|
||||
tokensAfter = undefined; // Don't trust the estimate
|
||||
}
|
||||
} catch {
|
||||
@@ -876,7 +890,7 @@ export async function compactEmbeddedPiSessionDirect(
|
||||
result: {
|
||||
summary: result.summary,
|
||||
firstKeptEntryId: result.firstKeptEntryId,
|
||||
tokensBefore: result.tokensBefore,
|
||||
tokensBefore: observedTokenCount ?? result.tokensBefore,
|
||||
tokensAfter,
|
||||
details: result.details,
|
||||
},
|
||||
@@ -977,6 +991,7 @@ export async function compactEmbeddedPiSession(
|
||||
sessionId: params.sessionId,
|
||||
sessionFile: params.sessionFile,
|
||||
tokenBudget: ceCtxInfo.tokens,
|
||||
currentTokenCount: params.currentTokenCount,
|
||||
customInstructions: params.customInstructions,
|
||||
force: params.trigger === "manual",
|
||||
runtimeContext: params as Record<string, unknown>,
|
||||
|
||||
@@ -19,7 +19,7 @@ function applyAndCapture(params: {
|
||||
|
||||
const baseStreamFn: StreamFn = (_model, _context, options) => {
|
||||
captured.headers = options?.headers;
|
||||
options?.onPayload?.({}, _model);
|
||||
options?.onPayload?.({});
|
||||
return createAssistantMessageEventStream();
|
||||
};
|
||||
const agent = { streamFn: baseStreamFn };
|
||||
@@ -97,7 +97,7 @@ describe("extra-params: Kilocode kilo/auto reasoning", () => {
|
||||
|
||||
const baseStreamFn: StreamFn = (_model, _context, options) => {
|
||||
const payload: Record<string, unknown> = { reasoning_effort: "high" };
|
||||
options?.onPayload?.(payload, _model);
|
||||
options?.onPayload?.(payload);
|
||||
capturedPayload = payload;
|
||||
return createAssistantMessageEventStream();
|
||||
};
|
||||
@@ -125,7 +125,7 @@ describe("extra-params: Kilocode kilo/auto reasoning", () => {
|
||||
|
||||
const baseStreamFn: StreamFn = (_model, _context, options) => {
|
||||
const payload: Record<string, unknown> = {};
|
||||
options?.onPayload?.(payload, _model);
|
||||
options?.onPayload?.(payload);
|
||||
capturedPayload = payload;
|
||||
return createAssistantMessageEventStream();
|
||||
};
|
||||
@@ -158,7 +158,7 @@ describe("extra-params: Kilocode kilo/auto reasoning", () => {
|
||||
|
||||
const baseStreamFn: StreamFn = (_model, _context, options) => {
|
||||
const payload: Record<string, unknown> = { reasoning_effort: "high" };
|
||||
options?.onPayload?.(payload, _model);
|
||||
options?.onPayload?.(payload);
|
||||
capturedPayload = payload;
|
||||
return createAssistantMessageEventStream();
|
||||
};
|
||||
|
||||
@@ -13,7 +13,7 @@ type StreamPayload = {
|
||||
|
||||
function runOpenRouterPayload(payload: StreamPayload, modelId: string) {
|
||||
const baseStreamFn: StreamFn = (_model, _context, options) => {
|
||||
options?.onPayload?.(payload, _model);
|
||||
options?.onPayload?.(payload);
|
||||
return createAssistantMessageEventStream();
|
||||
};
|
||||
const agent = { streamFn: baseStreamFn };
|
||||
|
||||
@@ -230,7 +230,7 @@ function createGoogleThinkingPayloadWrapper(
|
||||
thinkingLevel,
|
||||
});
|
||||
}
|
||||
return onPayload?.(payload, model);
|
||||
return onPayload?.(payload);
|
||||
},
|
||||
});
|
||||
};
|
||||
@@ -263,7 +263,7 @@ function createZaiToolStreamWrapper(
|
||||
// Inject tool_stream: true for Z.AI API
|
||||
(payload as Record<string, unknown>).tool_stream = true;
|
||||
}
|
||||
return originalOnPayload?.(payload, model);
|
||||
return originalOnPayload?.(payload);
|
||||
},
|
||||
});
|
||||
};
|
||||
@@ -310,7 +310,7 @@ function createParallelToolCallsWrapper(
|
||||
if (payload && typeof payload === "object") {
|
||||
(payload as Record<string, unknown>).parallel_tool_calls = enabled;
|
||||
}
|
||||
return originalOnPayload?.(payload, model);
|
||||
return originalOnPayload?.(payload);
|
||||
},
|
||||
});
|
||||
};
|
||||
|
||||
@@ -22,7 +22,7 @@ type ToolStreamCase = {
|
||||
function runToolStreamCase(params: ToolStreamCase) {
|
||||
const payload: Record<string, unknown> = { model: params.model.id, messages: [] };
|
||||
const baseStreamFn: StreamFn = (model, _context, options) => {
|
||||
options?.onPayload?.(payload, model);
|
||||
options?.onPayload?.(payload);
|
||||
return {} as ReturnType<StreamFn>;
|
||||
};
|
||||
const agent = { streamFn: baseStreamFn };
|
||||
|
||||
@@ -60,7 +60,7 @@ export function createSiliconFlowThinkingWrapper(baseStreamFn: StreamFn | undefi
|
||||
payloadObj.thinking = null;
|
||||
}
|
||||
}
|
||||
return originalOnPayload?.(payload, model);
|
||||
return originalOnPayload?.(payload);
|
||||
},
|
||||
});
|
||||
};
|
||||
@@ -106,7 +106,7 @@ export function createMoonshotThinkingWrapper(
|
||||
payloadObj.tool_choice = "auto";
|
||||
}
|
||||
}
|
||||
return originalOnPayload?.(payload, model);
|
||||
return originalOnPayload?.(payload);
|
||||
},
|
||||
});
|
||||
};
|
||||
|
||||
@@ -197,7 +197,7 @@ export function createOpenAIResponsesContextManagementWrapper(
|
||||
compactThreshold,
|
||||
});
|
||||
}
|
||||
return originalOnPayload?.(payload, model);
|
||||
return originalOnPayload?.(payload);
|
||||
},
|
||||
});
|
||||
};
|
||||
@@ -226,7 +226,7 @@ export function createOpenAIServiceTierWrapper(
|
||||
payloadObj.service_tier = serviceTier;
|
||||
}
|
||||
}
|
||||
return originalOnPayload?.(payload, model);
|
||||
return originalOnPayload?.(payload);
|
||||
},
|
||||
});
|
||||
};
|
||||
|
||||
@@ -92,7 +92,7 @@ export function createOpenRouterSystemCacheWrapper(baseStreamFn: StreamFn | unde
|
||||
}
|
||||
}
|
||||
}
|
||||
return originalOnPayload?.(payload, model);
|
||||
return originalOnPayload?.(payload);
|
||||
},
|
||||
});
|
||||
};
|
||||
@@ -113,7 +113,7 @@ export function createOpenRouterWrapper(
|
||||
},
|
||||
onPayload: (payload) => {
|
||||
normalizeProxyReasoningPayload(payload, thinkingLevel);
|
||||
return onPayload?.(payload, model);
|
||||
return onPayload?.(payload);
|
||||
},
|
||||
});
|
||||
};
|
||||
@@ -138,7 +138,7 @@ export function createKilocodeWrapper(
|
||||
},
|
||||
onPayload: (payload) => {
|
||||
normalizeProxyReasoningPayload(payload, thinkingLevel);
|
||||
return onPayload?.(payload, model);
|
||||
return onPayload?.(payload);
|
||||
},
|
||||
});
|
||||
};
|
||||
|
||||
@@ -109,13 +109,21 @@ vi.mock("../workspace-run.js", () => ({
|
||||
vi.mock("../pi-embedded-helpers.js", () => ({
|
||||
formatBillingErrorMessage: vi.fn(() => ""),
|
||||
classifyFailoverReason: vi.fn(() => null),
|
||||
extractObservedOverflowTokenCount: vi.fn((msg?: string) => {
|
||||
const match = msg?.match(/prompt is too long:\s*([\d,]+)\s+tokens\s*>\s*[\d,]+\s+maximum/i);
|
||||
return match?.[1] ? Number(match[1].replaceAll(",", "")) : undefined;
|
||||
}),
|
||||
formatAssistantErrorText: vi.fn(() => ""),
|
||||
isAuthAssistantError: vi.fn(() => false),
|
||||
isBillingAssistantError: vi.fn(() => false),
|
||||
isCompactionFailureError: vi.fn(() => false),
|
||||
isLikelyContextOverflowError: vi.fn((msg?: string) => {
|
||||
const lower = (msg ?? "").toLowerCase();
|
||||
return lower.includes("request_too_large") || lower.includes("context window exceeded");
|
||||
return (
|
||||
lower.includes("request_too_large") ||
|
||||
lower.includes("context window exceeded") ||
|
||||
lower.includes("prompt is too long")
|
||||
);
|
||||
}),
|
||||
isFailoverAssistantError: vi.fn(() => false),
|
||||
isFailoverErrorMessage: vi.fn(() => false),
|
||||
|
||||
@@ -111,6 +111,32 @@ describe("runEmbeddedPiAgent overflow compaction trigger routing", () => {
|
||||
);
|
||||
});
|
||||
|
||||
it("passes observed overflow token counts into compaction when providers report them", async () => {
|
||||
const overflowError = new Error(
|
||||
'400 {"type":"error","error":{"type":"invalid_request_error","message":"prompt is too long: 277403 tokens > 200000 maximum"}}',
|
||||
);
|
||||
|
||||
mockedRunEmbeddedAttempt
|
||||
.mockResolvedValueOnce(makeAttemptResult({ promptError: overflowError }))
|
||||
.mockResolvedValueOnce(makeAttemptResult({ promptError: null }));
|
||||
mockedCompactDirect.mockResolvedValueOnce(
|
||||
makeCompactionSuccess({
|
||||
summary: "Compacted session",
|
||||
firstKeptEntryId: "entry-8",
|
||||
tokensBefore: 277403,
|
||||
}),
|
||||
);
|
||||
|
||||
const result = await runEmbeddedPiAgent(overflowBaseRunParams);
|
||||
|
||||
expect(mockedCompactDirect).toHaveBeenCalledWith(
|
||||
expect.objectContaining({
|
||||
currentTokenCount: 277403,
|
||||
}),
|
||||
);
|
||||
expect(result.meta.error).toBeUndefined();
|
||||
});
|
||||
|
||||
it("does not reset compaction attempt budget after successful tool-result truncation", async () => {
|
||||
const overflowError = queueOverflowAttemptWithOversizedToolOutput(
|
||||
mockedRunEmbeddedAttempt,
|
||||
|
||||
@@ -40,6 +40,7 @@ import { ensureOpenClawModelsJson } from "../models-config.js";
|
||||
import {
|
||||
formatBillingErrorMessage,
|
||||
classifyFailoverReason,
|
||||
extractObservedOverflowTokenCount,
|
||||
formatAssistantErrorText,
|
||||
isAuthAssistantError,
|
||||
isBillingAssistantError,
|
||||
@@ -988,11 +989,13 @@ export async function runEmbeddedPiAgent(
|
||||
const overflowDiagId = createCompactionDiagId();
|
||||
const errorText = contextOverflowError.text;
|
||||
const msgCount = attempt.messagesSnapshot?.length ?? 0;
|
||||
const observedOverflowTokens = extractObservedOverflowTokenCount(errorText);
|
||||
log.warn(
|
||||
`[context-overflow-diag] sessionKey=${params.sessionKey ?? params.sessionId} ` +
|
||||
`provider=${provider}/${modelId} source=${contextOverflowError.source} ` +
|
||||
`messages=${msgCount} sessionFile=${params.sessionFile} ` +
|
||||
`diagId=${overflowDiagId} compactionAttempts=${overflowCompactionAttempts} ` +
|
||||
`observedTokens=${observedOverflowTokens ?? "unknown"} ` +
|
||||
`error=${errorText.slice(0, 200)}`,
|
||||
);
|
||||
const isCompactionFailure = isCompactionFailureError(errorText);
|
||||
@@ -1052,6 +1055,9 @@ export async function runEmbeddedPiAgent(
|
||||
sessionId: params.sessionId,
|
||||
sessionFile: params.sessionFile,
|
||||
tokenBudget: ctxInfo.tokens,
|
||||
...(observedOverflowTokens !== undefined
|
||||
? { currentTokenCount: observedOverflowTokens }
|
||||
: {}),
|
||||
force: true,
|
||||
compactionTarget: "budget",
|
||||
runtimeContext: {
|
||||
@@ -1074,6 +1080,9 @@ export async function runEmbeddedPiAgent(
|
||||
extraSystemPrompt: params.extraSystemPrompt,
|
||||
ownerNumbers: params.ownerNumbers,
|
||||
trigger: "overflow",
|
||||
...(observedOverflowTokens !== undefined
|
||||
? { currentTokenCount: observedOverflowTokens }
|
||||
: {}),
|
||||
diagId: overflowDiagId,
|
||||
attempt: overflowCompactionAttempts,
|
||||
maxAttempts: MAX_OVERFLOW_COMPACTION_ATTEMPTS,
|
||||
|
||||
@@ -233,14 +233,14 @@ export function wrapOllamaCompatNumCtx(baseFn: StreamFn | undefined, numCtx: num
|
||||
...options,
|
||||
onPayload: (payload: unknown) => {
|
||||
if (!payload || typeof payload !== "object") {
|
||||
return options?.onPayload?.(payload, model);
|
||||
return options?.onPayload?.(payload);
|
||||
}
|
||||
const payloadRecord = payload as Record<string, unknown>;
|
||||
if (!payloadRecord.options || typeof payloadRecord.options !== "object") {
|
||||
payloadRecord.options = {};
|
||||
}
|
||||
(payloadRecord.options as Record<string, unknown>).num_ctx = numCtx;
|
||||
return options?.onPayload?.(payload, model);
|
||||
return options?.onPayload?.(payload);
|
||||
},
|
||||
});
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user