mirror of
https://github.com/openclaw/openclaw.git
synced 2026-05-09 18:14:31 +00:00
* fix(cache): inject cache_control into system prompt for OpenRouter Anthropic
Add onPayload wrapper that injects cache_control: { type: "ephemeral" }
into the system/developer message content for OpenRouter requests routed
to Anthropic models. The system prompt is typically ~18k tokens and was
being re-processed on every request without caching.
Fixes #15151
* Changelog: add OpenRouter note for #17473
---------
Co-authored-by: Vincent Koc <vincentkoc@ieee.org>
This commit is contained in:
committed by
GitHub
parent
66529c7aa5
commit
c52b2ad5c3
@@ -290,6 +290,59 @@ function createAnthropicBetaHeadersWrapper(
|
||||
};
|
||||
}
|
||||
|
||||
function isOpenRouterAnthropicModel(provider: string, modelId: string): boolean {
|
||||
return provider.toLowerCase() === "openrouter" && modelId.toLowerCase().startsWith("anthropic/");
|
||||
}
|
||||
|
||||
type PayloadMessage = {
|
||||
role?: string;
|
||||
content?: unknown;
|
||||
};
|
||||
|
||||
/**
|
||||
* Inject cache_control into the system message for OpenRouter Anthropic models.
|
||||
* OpenRouter passes through Anthropic's cache_control field — caching the system
|
||||
* prompt avoids re-processing it on every request.
|
||||
*/
|
||||
function createOpenRouterSystemCacheWrapper(baseStreamFn: StreamFn | undefined): StreamFn {
|
||||
const underlying = baseStreamFn ?? streamSimple;
|
||||
return (model, context, options) => {
|
||||
if (
|
||||
typeof model.provider !== "string" ||
|
||||
typeof model.id !== "string" ||
|
||||
!isOpenRouterAnthropicModel(model.provider, model.id)
|
||||
) {
|
||||
return underlying(model, context, options);
|
||||
}
|
||||
|
||||
const originalOnPayload = options?.onPayload;
|
||||
return underlying(model, context, {
|
||||
...options,
|
||||
onPayload: (payload) => {
|
||||
const messages = (payload as Record<string, unknown>)?.messages;
|
||||
if (Array.isArray(messages)) {
|
||||
for (const msg of messages as PayloadMessage[]) {
|
||||
if (msg.role !== "system" && msg.role !== "developer") {
|
||||
continue;
|
||||
}
|
||||
if (typeof msg.content === "string") {
|
||||
msg.content = [
|
||||
{ type: "text", text: msg.content, cache_control: { type: "ephemeral" } },
|
||||
];
|
||||
} else if (Array.isArray(msg.content) && msg.content.length > 0) {
|
||||
const last = msg.content[msg.content.length - 1];
|
||||
if (last && typeof last === "object") {
|
||||
(last as Record<string, unknown>).cache_control = { type: "ephemeral" };
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
originalOnPayload?.(payload);
|
||||
},
|
||||
});
|
||||
};
|
||||
}
|
||||
|
||||
/**
|
||||
* Map OpenClaw's ThinkLevel to OpenRouter's reasoning.effort values.
|
||||
* "off" maps to "none"; all other levels pass through as-is.
|
||||
@@ -426,6 +479,7 @@ export function applyExtraParamsToAgent(
|
||||
if (provider === "openrouter") {
|
||||
log.debug(`applying OpenRouter app attribution headers for ${provider}/${modelId}`);
|
||||
agent.streamFn = createOpenRouterWrapper(agent.streamFn, thinkingLevel);
|
||||
agent.streamFn = createOpenRouterSystemCacheWrapper(agent.streamFn);
|
||||
}
|
||||
|
||||
// Enable Z.AI tool_stream for real-time tool call streaming.
|
||||
|
||||
Reference in New Issue
Block a user