fix(providers): make all models available in kilocode provider (#32352)

* kilocode: dynamic model discovery, kilo/auto default, cooldown exemption

- Replace 9-model hardcoded catalog with dynamic discovery from
  GET /api/gateway/models (Venice-like pattern with static fallback)
- Default model changed from anthropic/claude-opus-4.6 to kilo/auto
  (smart routing model)
- Add createKilocodeWrapper for X-KILOCODE-FEATURE header injection
  and reasoning.effort handling (skip for kilo/auto)
- Add kilocode to cooldown-exempt providers (proxy like OpenRouter)
- Keep sync buildKilocodeProvider for onboarding, add async
  buildKilocodeProviderWithDiscovery for implicit provider resolution
- Per-token gateway pricing converted to per-1M-token for cost fields

* kilocode: skip reasoning injection for x-ai models, harden discovery loop

* fix(kilocode): keep valid discovered duplicates (openclaw#32352, thanks @pandemicsyn)

* refactor(proxy): normalize reasoning payload guards (openclaw#32352, thanks @pandemicsyn)

* chore(changelog): note kilocode hardening (openclaw#32352, thanks @pandemicsyn and @vincentkoc)

* chore(changelog): fix kilocode note format (openclaw#32352, thanks @pandemicsyn and @vincentkoc)

* test(kilocode): support auto-model override cases (openclaw#32352, thanks @pandemicsyn)

* Update CHANGELOG.md

---------

Co-authored-by: Vincent Koc <vincentkoc@ieee.org>
This commit is contained in:
Florian Hines
2026-03-07 10:14:06 -06:00
committed by GitHub
parent 786ec21b5a
commit 33e7394861
15 changed files with 832 additions and 168 deletions

View File

@@ -9,6 +9,15 @@ const OPENROUTER_APP_HEADERS: Record<string, string> = {
"HTTP-Referer": "https://openclaw.ai",
"X-Title": "OpenClaw",
};
const KILOCODE_FEATURE_HEADER = "X-KILOCODE-FEATURE";
const KILOCODE_FEATURE_DEFAULT = "openclaw";
const KILOCODE_FEATURE_ENV_VAR = "KILOCODE_FEATURE";
function resolveKilocodeAppHeaders(): Record<string, string> {
const feature = process.env[KILOCODE_FEATURE_ENV_VAR]?.trim() || KILOCODE_FEATURE_DEFAULT;
return { [KILOCODE_FEATURE_HEADER]: feature };
}
const ANTHROPIC_CONTEXT_1M_BETA = "context-1m-2025-08-07";
const ANTHROPIC_1M_MODEL_PREFIXES = ["claude-opus-4", "claude-sonnet-4"] as const;
// NOTE: We only force `store=true` for *direct* OpenAI Responses.
@@ -846,6 +855,45 @@ function createKimiCodingAnthropicToolSchemaWrapper(baseStreamFn: StreamFn | und
* Create a streamFn wrapper that adds OpenRouter app attribution headers
* and injects reasoning.effort based on the configured thinking level.
*/
function normalizeProxyReasoningPayload(payload: unknown, thinkingLevel?: ThinkLevel): void {
if (!payload || typeof payload !== "object") {
return;
}
const payloadObj = payload as Record<string, unknown>;
// pi-ai may inject a top-level reasoning_effort (OpenAI flat format).
// OpenRouter-compatible proxy gateways expect the nested reasoning.effort
// shape instead, and some models reject the flat field outright.
delete payloadObj.reasoning_effort;
// When thinking is "off", or provider/model guards disable injection,
// leave reasoning unset after normalizing away the legacy flat field.
if (!thinkingLevel || thinkingLevel === "off") {
return;
}
const existingReasoning = payloadObj.reasoning;
// OpenRouter treats reasoning.effort and reasoning.max_tokens as
// alternative controls. If max_tokens is already present, do not inject
// effort and do not overwrite caller-supplied reasoning.
if (
existingReasoning &&
typeof existingReasoning === "object" &&
!Array.isArray(existingReasoning)
) {
const reasoningObj = existingReasoning as Record<string, unknown>;
if (!("max_tokens" in reasoningObj) && !("effort" in reasoningObj)) {
reasoningObj.effort = mapThinkingLevelToOpenRouterReasoningEffort(thinkingLevel);
}
} else if (!existingReasoning) {
payloadObj.reasoning = {
effort: mapThinkingLevelToOpenRouterReasoningEffort(thinkingLevel),
};
}
}
function createOpenRouterWrapper(
baseStreamFn: StreamFn | undefined,
thinkingLevel?: ThinkLevel,
@@ -860,42 +908,7 @@ function createOpenRouterWrapper(
...options?.headers,
},
onPayload: (payload) => {
if (thinkingLevel && payload && typeof payload === "object") {
const payloadObj = payload as Record<string, unknown>;
// pi-ai may inject a top-level reasoning_effort (OpenAI flat format).
// OpenRouter expects the nested reasoning.effort format instead, and
// rejects payloads containing both fields. Remove the flat field so
// only the nested one is sent.
delete payloadObj.reasoning_effort;
// When thinking is "off", do not inject reasoning at all.
// Some models (e.g. deepseek/deepseek-r1) require reasoning and reject
// { effort: "none" } with "Reasoning is mandatory for this endpoint and
// cannot be disabled." Omitting the field lets each model use its own
// default reasoning behavior.
if (thinkingLevel !== "off") {
const existingReasoning = payloadObj.reasoning;
// OpenRouter treats reasoning.effort and reasoning.max_tokens as
// alternative controls. If max_tokens is already present, do not
// inject effort and do not overwrite caller-supplied reasoning.
if (
existingReasoning &&
typeof existingReasoning === "object" &&
!Array.isArray(existingReasoning)
) {
const reasoningObj = existingReasoning as Record<string, unknown>;
if (!("max_tokens" in reasoningObj) && !("effort" in reasoningObj)) {
reasoningObj.effort = mapThinkingLevelToOpenRouterReasoningEffort(thinkingLevel);
}
} else if (!existingReasoning) {
payloadObj.reasoning = {
effort: mapThinkingLevelToOpenRouterReasoningEffort(thinkingLevel),
};
}
}
}
normalizeProxyReasoningPayload(payload, thinkingLevel);
onPayload?.(payload);
},
});
@@ -903,14 +916,41 @@ function createOpenRouterWrapper(
}
/**
* Models on OpenRouter that do not support the `reasoning.effort` parameter.
* Injecting it causes "Invalid arguments passed to the model" errors.
* Models on OpenRouter-style proxy providers that reject `reasoning.effort`.
*/
function isOpenRouterReasoningUnsupported(modelId: string): boolean {
function isProxyReasoningUnsupported(modelId: string): boolean {
const id = modelId.toLowerCase();
return id.startsWith("x-ai/");
}
/**
* Create a streamFn wrapper that adds the Kilocode feature attribution header
* and injects reasoning.effort based on the configured thinking level.
*
* The Kilocode provider gateway manages provider-specific quirks (e.g. cache
* control) server-side, so we only handle header injection and reasoning here.
*/
function createKilocodeWrapper(
baseStreamFn: StreamFn | undefined,
thinkingLevel?: ThinkLevel,
): StreamFn {
const underlying = baseStreamFn ?? streamSimple;
return (model, context, options) => {
const onPayload = options?.onPayload;
return underlying(model, context, {
...options,
headers: {
...options?.headers,
...resolveKilocodeAppHeaders(),
},
onPayload: (payload) => {
normalizeProxyReasoningPayload(payload, thinkingLevel);
onPayload?.(payload);
},
});
};
}
function isGemini31Model(modelId: string): boolean {
const normalized = modelId.toLowerCase();
return normalized.includes("gemini-3.1-pro") || normalized.includes("gemini-3.1-flash");
@@ -1118,12 +1158,22 @@ export function applyExtraParamsToAgent(
// and reject payloads containing it with "Invalid arguments passed to the
// model." Skip reasoning injection for these models.
// See: openclaw/openclaw#32039
const skipReasoningInjection = modelId === "auto" || isOpenRouterReasoningUnsupported(modelId);
const skipReasoningInjection = modelId === "auto" || isProxyReasoningUnsupported(modelId);
const openRouterThinkingLevel = skipReasoningInjection ? undefined : thinkingLevel;
agent.streamFn = createOpenRouterWrapper(agent.streamFn, openRouterThinkingLevel);
agent.streamFn = createOpenRouterSystemCacheWrapper(agent.streamFn);
}
if (provider === "kilocode") {
log.debug(`applying Kilocode feature header for ${provider}/${modelId}`);
// kilo/auto is a dynamic routing model — skip reasoning injection
// (same rationale as OpenRouter "auto"). See: openclaw/openclaw#24851
// Also skip for models known to reject reasoning.effort (e.g. x-ai/*).
const kilocodeThinkingLevel =
modelId === "kilo/auto" || isProxyReasoningUnsupported(modelId) ? undefined : thinkingLevel;
agent.streamFn = createKilocodeWrapper(agent.streamFn, kilocodeThinkingLevel);
}
if (provider === "amazon-bedrock" && !isAnthropicBedrockModel(modelId)) {
log.debug(`disabling prompt caching for non-Anthropic Bedrock model ${provider}/${modelId}`);
agent.streamFn = createBedrockNoCacheWrapper(agent.streamFn);