mirror of
https://github.com/openclaw/openclaw.git
synced 2026-05-10 02:12:44 +00:00
1328 lines
43 KiB
TypeScript
1328 lines
43 KiB
TypeScript
import type { StreamFn } from "@mariozechner/pi-agent-core";
|
|
import type { SimpleStreamOptions } from "@mariozechner/pi-ai";
|
|
import { streamSimple } from "@mariozechner/pi-ai";
|
|
import type { ThinkLevel } from "../../auto-reply/thinking.js";
|
|
import type { OpenClawConfig } from "../../config/config.js";
|
|
import { log } from "./logger.js";
|
|
|
|
const OPENROUTER_APP_HEADERS: Record<string, string> = {
|
|
"HTTP-Referer": "https://openclaw.ai",
|
|
"X-Title": "OpenClaw",
|
|
};
|
|
const KILOCODE_FEATURE_HEADER = "X-KILOCODE-FEATURE";
|
|
const KILOCODE_FEATURE_DEFAULT = "openclaw";
|
|
const KILOCODE_FEATURE_ENV_VAR = "KILOCODE_FEATURE";
|
|
|
|
function resolveKilocodeAppHeaders(): Record<string, string> {
|
|
const feature = process.env[KILOCODE_FEATURE_ENV_VAR]?.trim() || KILOCODE_FEATURE_DEFAULT;
|
|
return { [KILOCODE_FEATURE_HEADER]: feature };
|
|
}
|
|
|
|
const ANTHROPIC_CONTEXT_1M_BETA = "context-1m-2025-08-07";
|
|
const ANTHROPIC_1M_MODEL_PREFIXES = ["claude-opus-4", "claude-sonnet-4"] as const;
|
|
// NOTE: We only force `store=true` for *direct* OpenAI Responses.
|
|
// Codex responses (chatgpt.com/backend-api/codex/responses) require `store=false`.
|
|
const OPENAI_RESPONSES_APIS = new Set(["openai-responses"]);
|
|
const OPENAI_RESPONSES_PROVIDERS = new Set(["openai", "azure-openai-responses"]);
|
|
|
|
/**
|
|
* Resolve provider-specific extra params from model config.
|
|
* Used to pass through stream params like temperature/maxTokens.
|
|
*
|
|
* @internal Exported for testing only
|
|
*/
|
|
export function resolveExtraParams(params: {
|
|
cfg: OpenClawConfig | undefined;
|
|
provider: string;
|
|
modelId: string;
|
|
agentId?: string;
|
|
}): Record<string, unknown> | undefined {
|
|
const modelKey = `${params.provider}/${params.modelId}`;
|
|
const modelConfig = params.cfg?.agents?.defaults?.models?.[modelKey];
|
|
const globalParams = modelConfig?.params ? { ...modelConfig.params } : undefined;
|
|
const agentParams =
|
|
params.agentId && params.cfg?.agents?.list
|
|
? params.cfg.agents.list.find((agent) => agent.id === params.agentId)?.params
|
|
: undefined;
|
|
|
|
if (!globalParams && !agentParams) {
|
|
return undefined;
|
|
}
|
|
|
|
const merged = Object.assign({}, globalParams, agentParams);
|
|
const resolvedParallelToolCalls = resolveAliasedParamValue(
|
|
[globalParams, agentParams],
|
|
"parallel_tool_calls",
|
|
"parallelToolCalls",
|
|
);
|
|
if (resolvedParallelToolCalls !== undefined) {
|
|
merged.parallel_tool_calls = resolvedParallelToolCalls;
|
|
delete merged.parallelToolCalls;
|
|
}
|
|
|
|
return merged;
|
|
}
|
|
|
|
type CacheRetention = "none" | "short" | "long";
|
|
type OpenAIServiceTier = "auto" | "default" | "flex" | "priority";
|
|
type CacheRetentionStreamOptions = Partial<SimpleStreamOptions> & {
|
|
cacheRetention?: CacheRetention;
|
|
openaiWsWarmup?: boolean;
|
|
};
|
|
|
|
/**
|
|
* Resolve cacheRetention from extraParams, supporting both new `cacheRetention`
|
|
* and legacy `cacheControlTtl` values for backwards compatibility.
|
|
*
|
|
* Mapping: "5m" → "short", "1h" → "long"
|
|
*
|
|
* Applies to:
|
|
* - direct Anthropic provider
|
|
* - Anthropic Claude models on Bedrock when cache retention is explicitly configured
|
|
*
|
|
* OpenRouter uses openai-completions API with hardcoded cache_control instead
|
|
* of the cacheRetention stream option.
|
|
*
|
|
* Defaults to "short" for direct Anthropic when not explicitly configured.
|
|
*/
|
|
function resolveCacheRetention(
|
|
extraParams: Record<string, unknown> | undefined,
|
|
provider: string,
|
|
): CacheRetention | undefined {
|
|
const isAnthropicDirect = provider === "anthropic";
|
|
const hasBedrockOverride =
|
|
extraParams?.cacheRetention !== undefined || extraParams?.cacheControlTtl !== undefined;
|
|
const isAnthropicBedrock = provider === "amazon-bedrock" && hasBedrockOverride;
|
|
|
|
if (!isAnthropicDirect && !isAnthropicBedrock) {
|
|
return undefined;
|
|
}
|
|
|
|
// Prefer new cacheRetention if present
|
|
const newVal = extraParams?.cacheRetention;
|
|
if (newVal === "none" || newVal === "short" || newVal === "long") {
|
|
return newVal;
|
|
}
|
|
|
|
// Fall back to legacy cacheControlTtl with mapping
|
|
const legacy = extraParams?.cacheControlTtl;
|
|
if (legacy === "5m") {
|
|
return "short";
|
|
}
|
|
if (legacy === "1h") {
|
|
return "long";
|
|
}
|
|
|
|
// Default to "short" only for direct Anthropic when not explicitly configured.
|
|
// Bedrock retains upstream provider defaults unless explicitly set.
|
|
if (!isAnthropicDirect) {
|
|
return undefined;
|
|
}
|
|
|
|
// Default to "short" for direct Anthropic when not explicitly configured
|
|
return "short";
|
|
}
|
|
|
|
function createStreamFnWithExtraParams(
|
|
baseStreamFn: StreamFn | undefined,
|
|
extraParams: Record<string, unknown> | undefined,
|
|
provider: string,
|
|
): StreamFn | undefined {
|
|
if (!extraParams || Object.keys(extraParams).length === 0) {
|
|
return undefined;
|
|
}
|
|
|
|
const streamParams: CacheRetentionStreamOptions = {};
|
|
if (typeof extraParams.temperature === "number") {
|
|
streamParams.temperature = extraParams.temperature;
|
|
}
|
|
if (typeof extraParams.maxTokens === "number") {
|
|
streamParams.maxTokens = extraParams.maxTokens;
|
|
}
|
|
const transport = extraParams.transport;
|
|
if (transport === "sse" || transport === "websocket" || transport === "auto") {
|
|
streamParams.transport = transport;
|
|
} else if (transport != null) {
|
|
const transportSummary = typeof transport === "string" ? transport : typeof transport;
|
|
log.warn(`ignoring invalid transport param: ${transportSummary}`);
|
|
}
|
|
if (typeof extraParams.openaiWsWarmup === "boolean") {
|
|
streamParams.openaiWsWarmup = extraParams.openaiWsWarmup;
|
|
}
|
|
const cacheRetention = resolveCacheRetention(extraParams, provider);
|
|
if (cacheRetention) {
|
|
streamParams.cacheRetention = cacheRetention;
|
|
}
|
|
|
|
// Extract OpenRouter provider routing preferences from extraParams.provider.
|
|
// Injected into model.compat.openRouterRouting so pi-ai's buildParams sets
|
|
// params.provider in the API request body (openai-completions.js L359-362).
|
|
// pi-ai's OpenRouterRouting type only declares { only?, order? }, but at
|
|
// runtime the full object is forwarded — enabling allow_fallbacks,
|
|
// data_collection, ignore, sort, quantizations, etc.
|
|
const providerRouting =
|
|
provider === "openrouter" &&
|
|
extraParams.provider != null &&
|
|
typeof extraParams.provider === "object"
|
|
? (extraParams.provider as Record<string, unknown>)
|
|
: undefined;
|
|
|
|
if (Object.keys(streamParams).length === 0 && !providerRouting) {
|
|
return undefined;
|
|
}
|
|
|
|
log.debug(`creating streamFn wrapper with params: ${JSON.stringify(streamParams)}`);
|
|
if (providerRouting) {
|
|
log.debug(`OpenRouter provider routing: ${JSON.stringify(providerRouting)}`);
|
|
}
|
|
|
|
const underlying = baseStreamFn ?? streamSimple;
|
|
const wrappedStreamFn: StreamFn = (model, context, options) => {
|
|
// When provider routing is configured, inject it into model.compat so
|
|
// pi-ai picks it up via model.compat.openRouterRouting.
|
|
const effectiveModel = providerRouting
|
|
? ({
|
|
...model,
|
|
compat: { ...model.compat, openRouterRouting: providerRouting },
|
|
} as unknown as typeof model)
|
|
: model;
|
|
return underlying(effectiveModel, context, {
|
|
...streamParams,
|
|
...options,
|
|
});
|
|
};
|
|
|
|
return wrappedStreamFn;
|
|
}
|
|
|
|
function isAnthropicBedrockModel(modelId: string): boolean {
|
|
const normalized = modelId.toLowerCase();
|
|
return normalized.includes("anthropic.claude") || normalized.includes("anthropic/claude");
|
|
}
|
|
|
|
function createBedrockNoCacheWrapper(baseStreamFn: StreamFn | undefined): StreamFn {
|
|
const underlying = baseStreamFn ?? streamSimple;
|
|
return (model, context, options) =>
|
|
underlying(model, context, {
|
|
...options,
|
|
cacheRetention: "none",
|
|
});
|
|
}
|
|
|
|
function isDirectOpenAIBaseUrl(baseUrl: unknown): boolean {
|
|
if (typeof baseUrl !== "string" || !baseUrl.trim()) {
|
|
return false;
|
|
}
|
|
|
|
try {
|
|
const host = new URL(baseUrl).hostname.toLowerCase();
|
|
return (
|
|
host === "api.openai.com" || host === "chatgpt.com" || host.endsWith(".openai.azure.com")
|
|
);
|
|
} catch {
|
|
const normalized = baseUrl.toLowerCase();
|
|
return (
|
|
normalized.includes("api.openai.com") ||
|
|
normalized.includes("chatgpt.com") ||
|
|
normalized.includes(".openai.azure.com")
|
|
);
|
|
}
|
|
}
|
|
|
|
function isOpenAIPublicApiBaseUrl(baseUrl: unknown): boolean {
|
|
if (typeof baseUrl !== "string" || !baseUrl.trim()) {
|
|
return false;
|
|
}
|
|
|
|
try {
|
|
return new URL(baseUrl).hostname.toLowerCase() === "api.openai.com";
|
|
} catch {
|
|
return baseUrl.toLowerCase().includes("api.openai.com");
|
|
}
|
|
}
|
|
|
|
function shouldForceResponsesStore(model: {
|
|
api?: unknown;
|
|
provider?: unknown;
|
|
baseUrl?: unknown;
|
|
compat?: { supportsStore?: boolean };
|
|
}): boolean {
|
|
// Never force store=true when the model explicitly declares supportsStore=false
|
|
// (e.g. Azure OpenAI Responses API without server-side persistence).
|
|
if (model.compat?.supportsStore === false) {
|
|
return false;
|
|
}
|
|
if (typeof model.api !== "string" || typeof model.provider !== "string") {
|
|
return false;
|
|
}
|
|
if (!OPENAI_RESPONSES_APIS.has(model.api)) {
|
|
return false;
|
|
}
|
|
if (!OPENAI_RESPONSES_PROVIDERS.has(model.provider)) {
|
|
return false;
|
|
}
|
|
return isDirectOpenAIBaseUrl(model.baseUrl);
|
|
}
|
|
|
|
function parsePositiveInteger(value: unknown): number | undefined {
|
|
if (typeof value === "number" && Number.isFinite(value) && value > 0) {
|
|
return Math.floor(value);
|
|
}
|
|
if (typeof value === "string") {
|
|
const parsed = Number.parseInt(value, 10);
|
|
if (Number.isFinite(parsed) && parsed > 0) {
|
|
return parsed;
|
|
}
|
|
}
|
|
return undefined;
|
|
}
|
|
|
|
function resolveOpenAIResponsesCompactThreshold(model: { contextWindow?: unknown }): number {
|
|
const contextWindow = parsePositiveInteger(model.contextWindow);
|
|
if (contextWindow) {
|
|
return Math.max(1_000, Math.floor(contextWindow * 0.7));
|
|
}
|
|
return 80_000;
|
|
}
|
|
|
|
function shouldEnableOpenAIResponsesServerCompaction(
|
|
model: {
|
|
api?: unknown;
|
|
provider?: unknown;
|
|
baseUrl?: unknown;
|
|
compat?: { supportsStore?: boolean };
|
|
},
|
|
extraParams: Record<string, unknown> | undefined,
|
|
): boolean {
|
|
const configured = extraParams?.responsesServerCompaction;
|
|
if (configured === false) {
|
|
return false;
|
|
}
|
|
if (!shouldForceResponsesStore(model)) {
|
|
return false;
|
|
}
|
|
if (configured === true) {
|
|
return true;
|
|
}
|
|
// Auto-enable for direct OpenAI Responses models.
|
|
return model.provider === "openai";
|
|
}
|
|
|
|
function shouldStripResponsesStore(
|
|
model: { api?: unknown; compat?: { supportsStore?: boolean } },
|
|
forceStore: boolean,
|
|
): boolean {
|
|
if (forceStore) {
|
|
return false;
|
|
}
|
|
if (typeof model.api !== "string") {
|
|
return false;
|
|
}
|
|
return OPENAI_RESPONSES_APIS.has(model.api) && model.compat?.supportsStore === false;
|
|
}
|
|
|
|
function applyOpenAIResponsesPayloadOverrides(params: {
|
|
payloadObj: Record<string, unknown>;
|
|
forceStore: boolean;
|
|
stripStore: boolean;
|
|
useServerCompaction: boolean;
|
|
compactThreshold: number;
|
|
}): void {
|
|
if (params.forceStore) {
|
|
params.payloadObj.store = true;
|
|
}
|
|
if (params.stripStore) {
|
|
delete params.payloadObj.store;
|
|
}
|
|
if (params.useServerCompaction && params.payloadObj.context_management === undefined) {
|
|
params.payloadObj.context_management = [
|
|
{
|
|
type: "compaction",
|
|
compact_threshold: params.compactThreshold,
|
|
},
|
|
];
|
|
}
|
|
}
|
|
|
|
function createOpenAIResponsesContextManagementWrapper(
|
|
baseStreamFn: StreamFn | undefined,
|
|
extraParams: Record<string, unknown> | undefined,
|
|
): StreamFn {
|
|
const underlying = baseStreamFn ?? streamSimple;
|
|
return (model, context, options) => {
|
|
const forceStore = shouldForceResponsesStore(model);
|
|
const useServerCompaction = shouldEnableOpenAIResponsesServerCompaction(model, extraParams);
|
|
// Strip `store` from the payload when the model declares supportsStore=false.
|
|
// pi-ai upstream hardcodes `store: false` for Responses API; strict
|
|
// OpenAI-compatible endpoints (e.g. Gemini via Cloudflare) reject it.
|
|
const stripStore = shouldStripResponsesStore(model, forceStore);
|
|
if (!forceStore && !useServerCompaction && !stripStore) {
|
|
return underlying(model, context, options);
|
|
}
|
|
|
|
const compactThreshold =
|
|
parsePositiveInteger(extraParams?.responsesCompactThreshold) ??
|
|
resolveOpenAIResponsesCompactThreshold(model);
|
|
const originalOnPayload = options?.onPayload;
|
|
return underlying(model, context, {
|
|
...options,
|
|
onPayload: (payload) => {
|
|
if (payload && typeof payload === "object") {
|
|
applyOpenAIResponsesPayloadOverrides({
|
|
payloadObj: payload as Record<string, unknown>,
|
|
forceStore,
|
|
stripStore,
|
|
useServerCompaction,
|
|
compactThreshold,
|
|
});
|
|
}
|
|
originalOnPayload?.(payload);
|
|
},
|
|
});
|
|
};
|
|
}
|
|
|
|
function normalizeOpenAIServiceTier(value: unknown): OpenAIServiceTier | undefined {
|
|
if (typeof value !== "string") {
|
|
return undefined;
|
|
}
|
|
const normalized = value.trim().toLowerCase();
|
|
if (
|
|
normalized === "auto" ||
|
|
normalized === "default" ||
|
|
normalized === "flex" ||
|
|
normalized === "priority"
|
|
) {
|
|
return normalized;
|
|
}
|
|
return undefined;
|
|
}
|
|
|
|
function resolveOpenAIServiceTier(
|
|
extraParams: Record<string, unknown> | undefined,
|
|
): OpenAIServiceTier | undefined {
|
|
const raw = extraParams?.serviceTier ?? extraParams?.service_tier;
|
|
const normalized = normalizeOpenAIServiceTier(raw);
|
|
if (raw !== undefined && normalized === undefined) {
|
|
const rawSummary = typeof raw === "string" ? raw : typeof raw;
|
|
log.warn(`ignoring invalid OpenAI service tier param: ${rawSummary}`);
|
|
}
|
|
return normalized;
|
|
}
|
|
|
|
function createOpenAIServiceTierWrapper(
|
|
baseStreamFn: StreamFn | undefined,
|
|
serviceTier: OpenAIServiceTier,
|
|
): StreamFn {
|
|
const underlying = baseStreamFn ?? streamSimple;
|
|
return (model, context, options) => {
|
|
if (
|
|
model.api !== "openai-responses" ||
|
|
model.provider !== "openai" ||
|
|
!isOpenAIPublicApiBaseUrl(model.baseUrl)
|
|
) {
|
|
return underlying(model, context, options);
|
|
}
|
|
const originalOnPayload = options?.onPayload;
|
|
return underlying(model, context, {
|
|
...options,
|
|
onPayload: (payload) => {
|
|
if (payload && typeof payload === "object") {
|
|
const payloadObj = payload as Record<string, unknown>;
|
|
if (payloadObj.service_tier === undefined) {
|
|
payloadObj.service_tier = serviceTier;
|
|
}
|
|
}
|
|
originalOnPayload?.(payload);
|
|
},
|
|
});
|
|
};
|
|
}
|
|
|
|
function createCodexDefaultTransportWrapper(baseStreamFn: StreamFn | undefined): StreamFn {
|
|
const underlying = baseStreamFn ?? streamSimple;
|
|
return (model, context, options) =>
|
|
underlying(model, context, {
|
|
...options,
|
|
transport: options?.transport ?? "auto",
|
|
});
|
|
}
|
|
|
|
function createOpenAIDefaultTransportWrapper(baseStreamFn: StreamFn | undefined): StreamFn {
|
|
const underlying = baseStreamFn ?? streamSimple;
|
|
return (model, context, options) => {
|
|
const typedOptions = options as
|
|
| (SimpleStreamOptions & { openaiWsWarmup?: boolean })
|
|
| undefined;
|
|
const mergedOptions = {
|
|
...options,
|
|
transport: options?.transport ?? "auto",
|
|
// Warm-up is optional in OpenAI docs; enabled by default here for lower
|
|
// first-turn latency on WebSocket sessions. Set params.openaiWsWarmup=false
|
|
// to disable per model.
|
|
openaiWsWarmup: typedOptions?.openaiWsWarmup ?? true,
|
|
} as SimpleStreamOptions;
|
|
return underlying(model, context, mergedOptions);
|
|
};
|
|
}
|
|
|
|
function isAnthropic1MModel(modelId: string): boolean {
|
|
const normalized = modelId.trim().toLowerCase();
|
|
return ANTHROPIC_1M_MODEL_PREFIXES.some((prefix) => normalized.startsWith(prefix));
|
|
}
|
|
|
|
function parseHeaderList(value: unknown): string[] {
|
|
if (typeof value !== "string") {
|
|
return [];
|
|
}
|
|
return value
|
|
.split(",")
|
|
.map((item) => item.trim())
|
|
.filter(Boolean);
|
|
}
|
|
|
|
function resolveAnthropicBetas(
|
|
extraParams: Record<string, unknown> | undefined,
|
|
provider: string,
|
|
modelId: string,
|
|
): string[] | undefined {
|
|
if (provider !== "anthropic") {
|
|
return undefined;
|
|
}
|
|
|
|
const betas = new Set<string>();
|
|
const configured = extraParams?.anthropicBeta;
|
|
if (typeof configured === "string" && configured.trim()) {
|
|
betas.add(configured.trim());
|
|
} else if (Array.isArray(configured)) {
|
|
for (const beta of configured) {
|
|
if (typeof beta === "string" && beta.trim()) {
|
|
betas.add(beta.trim());
|
|
}
|
|
}
|
|
}
|
|
|
|
if (extraParams?.context1m === true) {
|
|
if (isAnthropic1MModel(modelId)) {
|
|
betas.add(ANTHROPIC_CONTEXT_1M_BETA);
|
|
} else {
|
|
log.warn(`ignoring context1m for non-opus/sonnet model: ${provider}/${modelId}`);
|
|
}
|
|
}
|
|
|
|
return betas.size > 0 ? [...betas] : undefined;
|
|
}
|
|
|
|
function mergeAnthropicBetaHeader(
|
|
headers: Record<string, string> | undefined,
|
|
betas: string[],
|
|
): Record<string, string> {
|
|
const merged = { ...headers };
|
|
const existingKey = Object.keys(merged).find((key) => key.toLowerCase() === "anthropic-beta");
|
|
const existing = existingKey ? parseHeaderList(merged[existingKey]) : [];
|
|
const values = Array.from(new Set([...existing, ...betas]));
|
|
const key = existingKey ?? "anthropic-beta";
|
|
merged[key] = values.join(",");
|
|
return merged;
|
|
}
|
|
|
|
// Betas that pi-ai's createClient injects for standard Anthropic API key calls.
|
|
// Must be included when injecting anthropic-beta via options.headers, because
|
|
// pi-ai's mergeHeaders uses Object.assign (last-wins), which would otherwise
|
|
// overwrite the hardcoded defaultHeaders["anthropic-beta"].
|
|
const PI_AI_DEFAULT_ANTHROPIC_BETAS = [
|
|
"fine-grained-tool-streaming-2025-05-14",
|
|
"interleaved-thinking-2025-05-14",
|
|
] as const;
|
|
|
|
// Additional betas pi-ai injects when the API key is an OAuth token (sk-ant-oat-*).
|
|
// These are required for Anthropic to accept OAuth Bearer auth. Losing oauth-2025-04-20
|
|
// causes a 401 "OAuth authentication is currently not supported".
|
|
const PI_AI_OAUTH_ANTHROPIC_BETAS = [
|
|
"claude-code-20250219",
|
|
"oauth-2025-04-20",
|
|
...PI_AI_DEFAULT_ANTHROPIC_BETAS,
|
|
] as const;
|
|
|
|
function isAnthropicOAuthApiKey(apiKey: unknown): boolean {
|
|
return typeof apiKey === "string" && apiKey.includes("sk-ant-oat");
|
|
}
|
|
|
|
function createAnthropicBetaHeadersWrapper(
|
|
baseStreamFn: StreamFn | undefined,
|
|
betas: string[],
|
|
): StreamFn {
|
|
const underlying = baseStreamFn ?? streamSimple;
|
|
return (model, context, options) => {
|
|
const isOauth = isAnthropicOAuthApiKey(options?.apiKey);
|
|
const requestedContext1m = betas.includes(ANTHROPIC_CONTEXT_1M_BETA);
|
|
const effectiveBetas =
|
|
isOauth && requestedContext1m
|
|
? betas.filter((beta) => beta !== ANTHROPIC_CONTEXT_1M_BETA)
|
|
: betas;
|
|
if (isOauth && requestedContext1m) {
|
|
log.warn(
|
|
`ignoring context1m for OAuth token auth on ${model.provider}/${model.id}; Anthropic rejects context-1m beta with OAuth auth`,
|
|
);
|
|
}
|
|
|
|
// Preserve the betas pi-ai's createClient would inject for the given token type.
|
|
// Without this, our options.headers["anthropic-beta"] overwrites the pi-ai
|
|
// defaultHeaders via Object.assign, stripping critical betas like oauth-2025-04-20.
|
|
const piAiBetas = isOauth
|
|
? (PI_AI_OAUTH_ANTHROPIC_BETAS as readonly string[])
|
|
: (PI_AI_DEFAULT_ANTHROPIC_BETAS as readonly string[]);
|
|
const allBetas = [...new Set([...piAiBetas, ...effectiveBetas])];
|
|
return underlying(model, context, {
|
|
...options,
|
|
headers: mergeAnthropicBetaHeader(options?.headers, allBetas),
|
|
});
|
|
};
|
|
}
|
|
|
|
function isOpenRouterAnthropicModel(provider: string, modelId: string): boolean {
|
|
return provider.toLowerCase() === "openrouter" && modelId.toLowerCase().startsWith("anthropic/");
|
|
}
|
|
|
|
type PayloadMessage = {
|
|
role?: string;
|
|
content?: unknown;
|
|
};
|
|
|
|
/**
|
|
* Inject cache_control into the system message for OpenRouter Anthropic models.
|
|
* OpenRouter passes through Anthropic's cache_control field — caching the system
|
|
* prompt avoids re-processing it on every request.
|
|
*/
|
|
function createOpenRouterSystemCacheWrapper(baseStreamFn: StreamFn | undefined): StreamFn {
|
|
const underlying = baseStreamFn ?? streamSimple;
|
|
return (model, context, options) => {
|
|
if (
|
|
typeof model.provider !== "string" ||
|
|
typeof model.id !== "string" ||
|
|
!isOpenRouterAnthropicModel(model.provider, model.id)
|
|
) {
|
|
return underlying(model, context, options);
|
|
}
|
|
|
|
const originalOnPayload = options?.onPayload;
|
|
return underlying(model, context, {
|
|
...options,
|
|
onPayload: (payload) => {
|
|
const messages = (payload as Record<string, unknown>)?.messages;
|
|
if (Array.isArray(messages)) {
|
|
for (const msg of messages as PayloadMessage[]) {
|
|
if (msg.role !== "system" && msg.role !== "developer") {
|
|
continue;
|
|
}
|
|
if (typeof msg.content === "string") {
|
|
msg.content = [
|
|
{ type: "text", text: msg.content, cache_control: { type: "ephemeral" } },
|
|
];
|
|
} else if (Array.isArray(msg.content) && msg.content.length > 0) {
|
|
const last = msg.content[msg.content.length - 1];
|
|
if (last && typeof last === "object") {
|
|
(last as Record<string, unknown>).cache_control = { type: "ephemeral" };
|
|
}
|
|
}
|
|
}
|
|
}
|
|
originalOnPayload?.(payload);
|
|
},
|
|
});
|
|
};
|
|
}
|
|
|
|
/**
|
|
* Map OpenClaw's ThinkLevel to OpenRouter's reasoning.effort values.
|
|
* "off" maps to "none"; all other levels pass through as-is.
|
|
*/
|
|
function mapThinkingLevelToOpenRouterReasoningEffort(
|
|
thinkingLevel: ThinkLevel,
|
|
): "none" | "minimal" | "low" | "medium" | "high" | "xhigh" {
|
|
if (thinkingLevel === "off") {
|
|
return "none";
|
|
}
|
|
if (thinkingLevel === "adaptive") {
|
|
return "medium";
|
|
}
|
|
return thinkingLevel;
|
|
}
|
|
|
|
function shouldApplySiliconFlowThinkingOffCompat(params: {
|
|
provider: string;
|
|
modelId: string;
|
|
thinkingLevel?: ThinkLevel;
|
|
}): boolean {
|
|
return (
|
|
params.provider === "siliconflow" &&
|
|
params.thinkingLevel === "off" &&
|
|
params.modelId.startsWith("Pro/")
|
|
);
|
|
}
|
|
|
|
/**
|
|
* SiliconFlow's Pro/* models reject string thinking modes (including "off")
|
|
* with HTTP 400 invalid-parameter errors. Normalize to `thinking: null` to
|
|
* preserve "thinking disabled" intent without sending an invalid enum value.
|
|
*/
|
|
function createSiliconFlowThinkingWrapper(baseStreamFn: StreamFn | undefined): StreamFn {
|
|
const underlying = baseStreamFn ?? streamSimple;
|
|
return (model, context, options) => {
|
|
const originalOnPayload = options?.onPayload;
|
|
return underlying(model, context, {
|
|
...options,
|
|
onPayload: (payload) => {
|
|
if (payload && typeof payload === "object") {
|
|
const payloadObj = payload as Record<string, unknown>;
|
|
if (payloadObj.thinking === "off") {
|
|
payloadObj.thinking = null;
|
|
}
|
|
}
|
|
originalOnPayload?.(payload);
|
|
},
|
|
});
|
|
};
|
|
}
|
|
|
|
type MoonshotThinkingType = "enabled" | "disabled";
|
|
|
|
function normalizeMoonshotThinkingType(value: unknown): MoonshotThinkingType | undefined {
|
|
if (typeof value === "boolean") {
|
|
return value ? "enabled" : "disabled";
|
|
}
|
|
if (typeof value === "string") {
|
|
const normalized = value.trim().toLowerCase();
|
|
if (
|
|
normalized === "enabled" ||
|
|
normalized === "enable" ||
|
|
normalized === "on" ||
|
|
normalized === "true"
|
|
) {
|
|
return "enabled";
|
|
}
|
|
if (
|
|
normalized === "disabled" ||
|
|
normalized === "disable" ||
|
|
normalized === "off" ||
|
|
normalized === "false"
|
|
) {
|
|
return "disabled";
|
|
}
|
|
return undefined;
|
|
}
|
|
if (value && typeof value === "object" && !Array.isArray(value)) {
|
|
const typeValue = (value as Record<string, unknown>).type;
|
|
return normalizeMoonshotThinkingType(typeValue);
|
|
}
|
|
return undefined;
|
|
}
|
|
|
|
function resolveMoonshotThinkingType(params: {
|
|
configuredThinking: unknown;
|
|
thinkingLevel?: ThinkLevel;
|
|
}): MoonshotThinkingType | undefined {
|
|
const configured = normalizeMoonshotThinkingType(params.configuredThinking);
|
|
if (configured) {
|
|
return configured;
|
|
}
|
|
if (!params.thinkingLevel) {
|
|
return undefined;
|
|
}
|
|
return params.thinkingLevel === "off" ? "disabled" : "enabled";
|
|
}
|
|
|
|
function isMoonshotToolChoiceCompatible(toolChoice: unknown): boolean {
|
|
if (toolChoice == null) {
|
|
return true;
|
|
}
|
|
if (toolChoice === "auto" || toolChoice === "none") {
|
|
return true;
|
|
}
|
|
if (typeof toolChoice === "object" && !Array.isArray(toolChoice)) {
|
|
const typeValue = (toolChoice as Record<string, unknown>).type;
|
|
return typeValue === "auto" || typeValue === "none";
|
|
}
|
|
return false;
|
|
}
|
|
|
|
/**
|
|
* Moonshot Kimi supports native binary thinking mode:
|
|
* - { thinking: { type: "enabled" } }
|
|
* - { thinking: { type: "disabled" } }
|
|
*
|
|
* When thinking is enabled, Moonshot only accepts tool_choice auto|none.
|
|
* Normalize incompatible values to auto instead of failing the request.
|
|
*/
|
|
function createMoonshotThinkingWrapper(
|
|
baseStreamFn: StreamFn | undefined,
|
|
thinkingType?: MoonshotThinkingType,
|
|
): StreamFn {
|
|
const underlying = baseStreamFn ?? streamSimple;
|
|
return (model, context, options) => {
|
|
const originalOnPayload = options?.onPayload;
|
|
return underlying(model, context, {
|
|
...options,
|
|
onPayload: (payload) => {
|
|
if (payload && typeof payload === "object") {
|
|
const payloadObj = payload as Record<string, unknown>;
|
|
let effectiveThinkingType = normalizeMoonshotThinkingType(payloadObj.thinking);
|
|
|
|
if (thinkingType) {
|
|
payloadObj.thinking = { type: thinkingType };
|
|
effectiveThinkingType = thinkingType;
|
|
}
|
|
|
|
if (
|
|
effectiveThinkingType === "enabled" &&
|
|
!isMoonshotToolChoiceCompatible(payloadObj.tool_choice)
|
|
) {
|
|
payloadObj.tool_choice = "auto";
|
|
}
|
|
}
|
|
originalOnPayload?.(payload);
|
|
},
|
|
});
|
|
};
|
|
}
|
|
|
|
function isKimiCodingAnthropicEndpoint(model: {
|
|
api?: unknown;
|
|
provider?: unknown;
|
|
baseUrl?: unknown;
|
|
}): boolean {
|
|
if (model.api !== "anthropic-messages") {
|
|
return false;
|
|
}
|
|
|
|
if (typeof model.provider === "string" && model.provider.trim().toLowerCase() === "kimi-coding") {
|
|
return true;
|
|
}
|
|
|
|
if (typeof model.baseUrl !== "string" || !model.baseUrl.trim()) {
|
|
return false;
|
|
}
|
|
|
|
try {
|
|
const parsed = new URL(model.baseUrl);
|
|
const host = parsed.hostname.toLowerCase();
|
|
const pathname = parsed.pathname.toLowerCase();
|
|
return host.endsWith("kimi.com") && pathname.startsWith("/coding");
|
|
} catch {
|
|
const normalized = model.baseUrl.toLowerCase();
|
|
return normalized.includes("kimi.com/coding");
|
|
}
|
|
}
|
|
|
|
function normalizeKimiCodingToolDefinition(tool: unknown): Record<string, unknown> | undefined {
|
|
if (!tool || typeof tool !== "object" || Array.isArray(tool)) {
|
|
return undefined;
|
|
}
|
|
|
|
const toolObj = tool as Record<string, unknown>;
|
|
if (toolObj.function && typeof toolObj.function === "object") {
|
|
return toolObj;
|
|
}
|
|
|
|
const rawName = typeof toolObj.name === "string" ? toolObj.name.trim() : "";
|
|
if (!rawName) {
|
|
return toolObj;
|
|
}
|
|
|
|
const functionSpec: Record<string, unknown> = {
|
|
name: rawName,
|
|
parameters:
|
|
toolObj.input_schema && typeof toolObj.input_schema === "object"
|
|
? toolObj.input_schema
|
|
: toolObj.parameters && typeof toolObj.parameters === "object"
|
|
? toolObj.parameters
|
|
: { type: "object", properties: {} },
|
|
};
|
|
|
|
if (typeof toolObj.description === "string" && toolObj.description.trim()) {
|
|
functionSpec.description = toolObj.description;
|
|
}
|
|
if (typeof toolObj.strict === "boolean") {
|
|
functionSpec.strict = toolObj.strict;
|
|
}
|
|
|
|
return {
|
|
type: "function",
|
|
function: functionSpec,
|
|
};
|
|
}
|
|
|
|
function normalizeKimiCodingToolChoice(toolChoice: unknown): unknown {
|
|
if (!toolChoice || typeof toolChoice !== "object" || Array.isArray(toolChoice)) {
|
|
return toolChoice;
|
|
}
|
|
|
|
const choice = toolChoice as Record<string, unknown>;
|
|
if (choice.type === "auto") {
|
|
return "auto";
|
|
}
|
|
if (choice.type === "none") {
|
|
return "none";
|
|
}
|
|
if (choice.type === "required") {
|
|
return "required";
|
|
}
|
|
if (choice.type === "any") {
|
|
return "required";
|
|
}
|
|
if (choice.type === "tool" && typeof choice.name === "string" && choice.name.trim()) {
|
|
return {
|
|
type: "function",
|
|
function: { name: choice.name.trim() },
|
|
};
|
|
}
|
|
|
|
return toolChoice;
|
|
}
|
|
|
|
/**
|
|
* Kimi Coding's anthropic-messages endpoint expects OpenAI-style tool payloads
|
|
* (`tools[].function`) even when messages use Anthropic request framing.
|
|
*/
|
|
function createKimiCodingAnthropicToolSchemaWrapper(baseStreamFn: StreamFn | undefined): StreamFn {
|
|
const underlying = baseStreamFn ?? streamSimple;
|
|
return (model, context, options) => {
|
|
const originalOnPayload = options?.onPayload;
|
|
return underlying(model, context, {
|
|
...options,
|
|
onPayload: (payload) => {
|
|
if (payload && typeof payload === "object" && isKimiCodingAnthropicEndpoint(model)) {
|
|
const payloadObj = payload as Record<string, unknown>;
|
|
if (Array.isArray(payloadObj.tools)) {
|
|
payloadObj.tools = payloadObj.tools
|
|
.map((tool) => normalizeKimiCodingToolDefinition(tool))
|
|
.filter((tool): tool is Record<string, unknown> => !!tool);
|
|
}
|
|
payloadObj.tool_choice = normalizeKimiCodingToolChoice(payloadObj.tool_choice);
|
|
}
|
|
originalOnPayload?.(payload);
|
|
},
|
|
});
|
|
};
|
|
}
|
|
|
|
/**
|
|
* Create a streamFn wrapper that adds OpenRouter app attribution headers
|
|
* and injects reasoning.effort based on the configured thinking level.
|
|
*/
|
|
function normalizeProxyReasoningPayload(payload: unknown, thinkingLevel?: ThinkLevel): void {
|
|
if (!payload || typeof payload !== "object") {
|
|
return;
|
|
}
|
|
|
|
const payloadObj = payload as Record<string, unknown>;
|
|
|
|
// pi-ai may inject a top-level reasoning_effort (OpenAI flat format).
|
|
// OpenRouter-compatible proxy gateways expect the nested reasoning.effort
|
|
// shape instead, and some models reject the flat field outright.
|
|
delete payloadObj.reasoning_effort;
|
|
|
|
// When thinking is "off", or provider/model guards disable injection,
|
|
// leave reasoning unset after normalizing away the legacy flat field.
|
|
if (!thinkingLevel || thinkingLevel === "off") {
|
|
return;
|
|
}
|
|
|
|
const existingReasoning = payloadObj.reasoning;
|
|
|
|
// OpenRouter treats reasoning.effort and reasoning.max_tokens as
|
|
// alternative controls. If max_tokens is already present, do not inject
|
|
// effort and do not overwrite caller-supplied reasoning.
|
|
if (
|
|
existingReasoning &&
|
|
typeof existingReasoning === "object" &&
|
|
!Array.isArray(existingReasoning)
|
|
) {
|
|
const reasoningObj = existingReasoning as Record<string, unknown>;
|
|
if (!("max_tokens" in reasoningObj) && !("effort" in reasoningObj)) {
|
|
reasoningObj.effort = mapThinkingLevelToOpenRouterReasoningEffort(thinkingLevel);
|
|
}
|
|
} else if (!existingReasoning) {
|
|
payloadObj.reasoning = {
|
|
effort: mapThinkingLevelToOpenRouterReasoningEffort(thinkingLevel),
|
|
};
|
|
}
|
|
}
|
|
|
|
function createOpenRouterWrapper(
|
|
baseStreamFn: StreamFn | undefined,
|
|
thinkingLevel?: ThinkLevel,
|
|
): StreamFn {
|
|
const underlying = baseStreamFn ?? streamSimple;
|
|
return (model, context, options) => {
|
|
const onPayload = options?.onPayload;
|
|
return underlying(model, context, {
|
|
...options,
|
|
headers: {
|
|
...OPENROUTER_APP_HEADERS,
|
|
...options?.headers,
|
|
},
|
|
onPayload: (payload) => {
|
|
normalizeProxyReasoningPayload(payload, thinkingLevel);
|
|
onPayload?.(payload);
|
|
},
|
|
});
|
|
};
|
|
}
|
|
|
|
/**
|
|
* Models on OpenRouter-style proxy providers that reject `reasoning.effort`.
|
|
*/
|
|
function isProxyReasoningUnsupported(modelId: string): boolean {
|
|
const id = modelId.toLowerCase();
|
|
return id.startsWith("x-ai/");
|
|
}
|
|
|
|
/**
|
|
* Create a streamFn wrapper that adds the Kilocode feature attribution header
|
|
* and injects reasoning.effort based on the configured thinking level.
|
|
*
|
|
* The Kilocode provider gateway manages provider-specific quirks (e.g. cache
|
|
* control) server-side, so we only handle header injection and reasoning here.
|
|
*/
|
|
function createKilocodeWrapper(
|
|
baseStreamFn: StreamFn | undefined,
|
|
thinkingLevel?: ThinkLevel,
|
|
): StreamFn {
|
|
const underlying = baseStreamFn ?? streamSimple;
|
|
return (model, context, options) => {
|
|
const onPayload = options?.onPayload;
|
|
return underlying(model, context, {
|
|
...options,
|
|
headers: {
|
|
...options?.headers,
|
|
...resolveKilocodeAppHeaders(),
|
|
},
|
|
onPayload: (payload) => {
|
|
normalizeProxyReasoningPayload(payload, thinkingLevel);
|
|
onPayload?.(payload);
|
|
},
|
|
});
|
|
};
|
|
}
|
|
|
|
function isGemini31Model(modelId: string): boolean {
|
|
const normalized = modelId.toLowerCase();
|
|
return normalized.includes("gemini-3.1-pro") || normalized.includes("gemini-3.1-flash");
|
|
}
|
|
|
|
function mapThinkLevelToGoogleThinkingLevel(
|
|
thinkingLevel: ThinkLevel,
|
|
): "MINIMAL" | "LOW" | "MEDIUM" | "HIGH" | undefined {
|
|
switch (thinkingLevel) {
|
|
case "minimal":
|
|
return "MINIMAL";
|
|
case "low":
|
|
return "LOW";
|
|
case "medium":
|
|
case "adaptive":
|
|
return "MEDIUM";
|
|
case "high":
|
|
case "xhigh":
|
|
return "HIGH";
|
|
default:
|
|
return undefined;
|
|
}
|
|
}
|
|
|
|
function sanitizeGoogleThinkingPayload(params: {
|
|
payload: unknown;
|
|
modelId?: string;
|
|
thinkingLevel?: ThinkLevel;
|
|
}): void {
|
|
if (!params.payload || typeof params.payload !== "object") {
|
|
return;
|
|
}
|
|
const payloadObj = params.payload as Record<string, unknown>;
|
|
const config = payloadObj.config;
|
|
if (!config || typeof config !== "object") {
|
|
return;
|
|
}
|
|
const configObj = config as Record<string, unknown>;
|
|
const thinkingConfig = configObj.thinkingConfig;
|
|
if (!thinkingConfig || typeof thinkingConfig !== "object") {
|
|
return;
|
|
}
|
|
const thinkingConfigObj = thinkingConfig as Record<string, unknown>;
|
|
const thinkingBudget = thinkingConfigObj.thinkingBudget;
|
|
if (typeof thinkingBudget !== "number" || thinkingBudget >= 0) {
|
|
return;
|
|
}
|
|
|
|
// pi-ai can emit thinkingBudget=-1 for some Gemini 3.1 IDs; a negative budget
|
|
// is invalid for Google-compatible backends and can lead to malformed handling.
|
|
delete thinkingConfigObj.thinkingBudget;
|
|
|
|
if (
|
|
typeof params.modelId === "string" &&
|
|
isGemini31Model(params.modelId) &&
|
|
params.thinkingLevel &&
|
|
params.thinkingLevel !== "off" &&
|
|
thinkingConfigObj.thinkingLevel === undefined
|
|
) {
|
|
const mappedLevel = mapThinkLevelToGoogleThinkingLevel(params.thinkingLevel);
|
|
if (mappedLevel) {
|
|
thinkingConfigObj.thinkingLevel = mappedLevel;
|
|
}
|
|
}
|
|
}
|
|
|
|
function createGoogleThinkingPayloadWrapper(
|
|
baseStreamFn: StreamFn | undefined,
|
|
thinkingLevel?: ThinkLevel,
|
|
): StreamFn {
|
|
const underlying = baseStreamFn ?? streamSimple;
|
|
return (model, context, options) => {
|
|
const onPayload = options?.onPayload;
|
|
return underlying(model, context, {
|
|
...options,
|
|
onPayload: (payload) => {
|
|
if (model.api === "google-generative-ai") {
|
|
sanitizeGoogleThinkingPayload({
|
|
payload,
|
|
modelId: model.id,
|
|
thinkingLevel,
|
|
});
|
|
}
|
|
onPayload?.(payload);
|
|
},
|
|
});
|
|
};
|
|
}
|
|
|
|
/**
|
|
* Create a streamFn wrapper that injects tool_stream=true for Z.AI providers.
|
|
*
|
|
* Z.AI's API supports the `tool_stream` parameter to enable real-time streaming
|
|
* of tool call arguments and reasoning content. When enabled, the API returns
|
|
* progressive tool_call deltas, allowing users to see tool execution in real-time.
|
|
*
|
|
* @see https://docs.z.ai/api-reference#streaming
|
|
*/
|
|
function createZaiToolStreamWrapper(
|
|
baseStreamFn: StreamFn | undefined,
|
|
enabled: boolean,
|
|
): StreamFn {
|
|
const underlying = baseStreamFn ?? streamSimple;
|
|
return (model, context, options) => {
|
|
if (!enabled) {
|
|
return underlying(model, context, options);
|
|
}
|
|
|
|
const originalOnPayload = options?.onPayload;
|
|
return underlying(model, context, {
|
|
...options,
|
|
onPayload: (payload) => {
|
|
if (payload && typeof payload === "object") {
|
|
// Inject tool_stream: true for Z.AI API
|
|
(payload as Record<string, unknown>).tool_stream = true;
|
|
}
|
|
originalOnPayload?.(payload);
|
|
},
|
|
});
|
|
};
|
|
}
|
|
|
|
function resolveAliasedParamValue(
|
|
sources: Array<Record<string, unknown> | undefined>,
|
|
snakeCaseKey: string,
|
|
camelCaseKey: string,
|
|
): unknown {
|
|
let resolved: unknown = undefined;
|
|
let seen = false;
|
|
for (const source of sources) {
|
|
if (!source) {
|
|
continue;
|
|
}
|
|
const hasSnakeCaseKey = Object.hasOwn(source, snakeCaseKey);
|
|
const hasCamelCaseKey = Object.hasOwn(source, camelCaseKey);
|
|
if (!hasSnakeCaseKey && !hasCamelCaseKey) {
|
|
continue;
|
|
}
|
|
resolved = hasSnakeCaseKey ? source[snakeCaseKey] : source[camelCaseKey];
|
|
seen = true;
|
|
}
|
|
return seen ? resolved : undefined;
|
|
}
|
|
|
|
function createParallelToolCallsWrapper(
|
|
baseStreamFn: StreamFn | undefined,
|
|
enabled: boolean,
|
|
): StreamFn {
|
|
const underlying = baseStreamFn ?? streamSimple;
|
|
return (model, context, options) => {
|
|
if (model.api !== "openai-completions" && model.api !== "openai-responses") {
|
|
return underlying(model, context, options);
|
|
}
|
|
log.debug(
|
|
`applying parallel_tool_calls=${enabled} for ${model.provider ?? "unknown"}/${model.id ?? "unknown"} api=${model.api}`,
|
|
);
|
|
const originalOnPayload = options?.onPayload;
|
|
return underlying(model, context, {
|
|
...options,
|
|
onPayload: (payload) => {
|
|
if (payload && typeof payload === "object") {
|
|
(payload as Record<string, unknown>).parallel_tool_calls = enabled;
|
|
}
|
|
originalOnPayload?.(payload);
|
|
},
|
|
});
|
|
};
|
|
}
|
|
|
|
/**
|
|
* Apply extra params (like temperature) to an agent's streamFn.
|
|
* Also adds OpenRouter app attribution headers when using the OpenRouter provider.
|
|
*
|
|
* @internal Exported for testing
|
|
*/
|
|
export function applyExtraParamsToAgent(
|
|
agent: { streamFn?: StreamFn },
|
|
cfg: OpenClawConfig | undefined,
|
|
provider: string,
|
|
modelId: string,
|
|
extraParamsOverride?: Record<string, unknown>,
|
|
thinkingLevel?: ThinkLevel,
|
|
agentId?: string,
|
|
): void {
|
|
const resolvedExtraParams = resolveExtraParams({
|
|
cfg,
|
|
provider,
|
|
modelId,
|
|
agentId,
|
|
});
|
|
if (provider === "openai-codex") {
|
|
// Default Codex to WebSocket-first when nothing else specifies transport.
|
|
agent.streamFn = createCodexDefaultTransportWrapper(agent.streamFn);
|
|
} else if (provider === "openai") {
|
|
// Default OpenAI Responses to WebSocket-first with transparent SSE fallback.
|
|
agent.streamFn = createOpenAIDefaultTransportWrapper(agent.streamFn);
|
|
}
|
|
const override =
|
|
extraParamsOverride && Object.keys(extraParamsOverride).length > 0
|
|
? Object.fromEntries(
|
|
Object.entries(extraParamsOverride).filter(([, value]) => value !== undefined),
|
|
)
|
|
: undefined;
|
|
const merged = Object.assign({}, resolvedExtraParams, override);
|
|
const wrappedStreamFn = createStreamFnWithExtraParams(agent.streamFn, merged, provider);
|
|
|
|
if (wrappedStreamFn) {
|
|
log.debug(`applying extraParams to agent streamFn for ${provider}/${modelId}`);
|
|
agent.streamFn = wrappedStreamFn;
|
|
}
|
|
|
|
const anthropicBetas = resolveAnthropicBetas(merged, provider, modelId);
|
|
if (anthropicBetas?.length) {
|
|
log.debug(
|
|
`applying Anthropic beta header for ${provider}/${modelId}: ${anthropicBetas.join(",")}`,
|
|
);
|
|
agent.streamFn = createAnthropicBetaHeadersWrapper(agent.streamFn, anthropicBetas);
|
|
}
|
|
|
|
if (shouldApplySiliconFlowThinkingOffCompat({ provider, modelId, thinkingLevel })) {
|
|
log.debug(
|
|
`normalizing thinking=off to thinking=null for SiliconFlow compatibility (${provider}/${modelId})`,
|
|
);
|
|
agent.streamFn = createSiliconFlowThinkingWrapper(agent.streamFn);
|
|
}
|
|
|
|
if (provider === "moonshot") {
|
|
const moonshotThinkingType = resolveMoonshotThinkingType({
|
|
configuredThinking: merged?.thinking,
|
|
thinkingLevel,
|
|
});
|
|
if (moonshotThinkingType) {
|
|
log.debug(
|
|
`applying Moonshot thinking=${moonshotThinkingType} payload wrapper for ${provider}/${modelId}`,
|
|
);
|
|
}
|
|
agent.streamFn = createMoonshotThinkingWrapper(agent.streamFn, moonshotThinkingType);
|
|
}
|
|
|
|
agent.streamFn = createKimiCodingAnthropicToolSchemaWrapper(agent.streamFn);
|
|
|
|
if (provider === "openrouter") {
|
|
log.debug(`applying OpenRouter app attribution headers for ${provider}/${modelId}`);
|
|
// "auto" is a dynamic routing model — we don't know which underlying model
|
|
// OpenRouter will select, and it may be a reasoning-required endpoint.
|
|
// Omit the thinkingLevel so we never inject `reasoning.effort: "none"`,
|
|
// which would cause a 400 on models where reasoning is mandatory.
|
|
// Users who need reasoning control should target a specific model ID.
|
|
// See: openclaw/openclaw#24851
|
|
//
|
|
// x-ai/grok models do not support OpenRouter's reasoning.effort parameter
|
|
// and reject payloads containing it with "Invalid arguments passed to the
|
|
// model." Skip reasoning injection for these models.
|
|
// See: openclaw/openclaw#32039
|
|
const skipReasoningInjection = modelId === "auto" || isProxyReasoningUnsupported(modelId);
|
|
const openRouterThinkingLevel = skipReasoningInjection ? undefined : thinkingLevel;
|
|
agent.streamFn = createOpenRouterWrapper(agent.streamFn, openRouterThinkingLevel);
|
|
agent.streamFn = createOpenRouterSystemCacheWrapper(agent.streamFn);
|
|
}
|
|
|
|
if (provider === "kilocode") {
|
|
log.debug(`applying Kilocode feature header for ${provider}/${modelId}`);
|
|
// kilo/auto is a dynamic routing model — skip reasoning injection
|
|
// (same rationale as OpenRouter "auto"). See: openclaw/openclaw#24851
|
|
// Also skip for models known to reject reasoning.effort (e.g. x-ai/*).
|
|
const kilocodeThinkingLevel =
|
|
modelId === "kilo/auto" || isProxyReasoningUnsupported(modelId) ? undefined : thinkingLevel;
|
|
agent.streamFn = createKilocodeWrapper(agent.streamFn, kilocodeThinkingLevel);
|
|
}
|
|
|
|
if (provider === "amazon-bedrock" && !isAnthropicBedrockModel(modelId)) {
|
|
log.debug(`disabling prompt caching for non-Anthropic Bedrock model ${provider}/${modelId}`);
|
|
agent.streamFn = createBedrockNoCacheWrapper(agent.streamFn);
|
|
}
|
|
|
|
// Enable Z.AI tool_stream for real-time tool call streaming.
|
|
// Enabled by default for Z.AI provider, can be disabled via params.tool_stream: false
|
|
if (provider === "zai" || provider === "z-ai") {
|
|
const toolStreamEnabled = merged?.tool_stream !== false;
|
|
if (toolStreamEnabled) {
|
|
log.debug(`enabling Z.AI tool_stream for ${provider}/${modelId}`);
|
|
agent.streamFn = createZaiToolStreamWrapper(agent.streamFn, true);
|
|
}
|
|
}
|
|
|
|
// Guard Google payloads against invalid negative thinking budgets emitted by
|
|
// upstream model-ID heuristics for Gemini 3.1 variants.
|
|
agent.streamFn = createGoogleThinkingPayloadWrapper(agent.streamFn, thinkingLevel);
|
|
|
|
const openAIServiceTier = resolveOpenAIServiceTier(merged);
|
|
if (openAIServiceTier) {
|
|
log.debug(`applying OpenAI service_tier=${openAIServiceTier} for ${provider}/${modelId}`);
|
|
agent.streamFn = createOpenAIServiceTierWrapper(agent.streamFn, openAIServiceTier);
|
|
}
|
|
|
|
// Work around upstream pi-ai hardcoding `store: false` for Responses API.
|
|
// Force `store=true` for direct OpenAI Responses models and auto-enable
|
|
// server-side compaction for compatible OpenAI Responses payloads.
|
|
agent.streamFn = createOpenAIResponsesContextManagementWrapper(agent.streamFn, merged);
|
|
|
|
const rawParallelToolCalls = resolveAliasedParamValue(
|
|
[resolvedExtraParams, override],
|
|
"parallel_tool_calls",
|
|
"parallelToolCalls",
|
|
);
|
|
if (rawParallelToolCalls !== undefined) {
|
|
if (typeof rawParallelToolCalls === "boolean") {
|
|
agent.streamFn = createParallelToolCallsWrapper(agent.streamFn, rawParallelToolCalls);
|
|
} else if (rawParallelToolCalls === null) {
|
|
log.debug("parallel_tool_calls suppressed by null override, skipping injection");
|
|
} else {
|
|
const summary =
|
|
typeof rawParallelToolCalls === "string"
|
|
? rawParallelToolCalls
|
|
: typeof rawParallelToolCalls;
|
|
log.warn(`ignoring invalid parallel_tool_calls param: ${summary}`);
|
|
}
|
|
}
|
|
}
|