mirror of
https://github.com/openclaw/openclaw.git
synced 2026-05-10 15:24:58 +00:00
fix(providers): make all models available in kilocode provider (#32352)
* kilocode: dynamic model discovery, kilo/auto default, cooldown exemption - Replace 9-model hardcoded catalog with dynamic discovery from GET /api/gateway/models (Venice-like pattern with static fallback) - Default model changed from anthropic/claude-opus-4.6 to kilo/auto (smart routing model) - Add createKilocodeWrapper for X-KILOCODE-FEATURE header injection and reasoning.effort handling (skip for kilo/auto) - Add kilocode to cooldown-exempt providers (proxy like OpenRouter) - Keep sync buildKilocodeProvider for onboarding, add async buildKilocodeProviderWithDiscovery for implicit provider resolution - Per-token gateway pricing converted to per-1M-token for cost fields * kilocode: skip reasoning injection for x-ai models, harden discovery loop * fix(kilocode): keep valid discovered duplicates (openclaw#32352, thanks @pandemicsyn) * refactor(proxy): normalize reasoning payload guards (openclaw#32352, thanks @pandemicsyn) * chore(changelog): note kilocode hardening (openclaw#32352, thanks @pandemicsyn and @vincentkoc) * chore(changelog): fix kilocode note format (openclaw#32352, thanks @pandemicsyn and @vincentkoc) * test(kilocode): support auto-model override cases (openclaw#32352, thanks @pandemicsyn) * Update CHANGELOG.md --------- Co-authored-by: Vincent Koc <vincentkoc@ieee.org>
This commit is contained in:
182
src/agents/pi-embedded-runner/extra-params.kilocode.test.ts
Normal file
182
src/agents/pi-embedded-runner/extra-params.kilocode.test.ts
Normal file
@@ -0,0 +1,182 @@
|
||||
import type { StreamFn } from "@mariozechner/pi-agent-core";
|
||||
import type { Context, Model } from "@mariozechner/pi-ai";
|
||||
import { createAssistantMessageEventStream } from "@mariozechner/pi-ai";
|
||||
import { afterEach, describe, expect, it } from "vitest";
|
||||
import { captureEnv } from "../../test-utils/env.js";
|
||||
import { applyExtraParamsToAgent } from "./extra-params.js";
|
||||
|
||||
type CapturedCall = {
|
||||
headers?: Record<string, string>;
|
||||
payload?: Record<string, unknown>;
|
||||
};
|
||||
|
||||
function applyAndCapture(params: {
|
||||
provider: string;
|
||||
modelId: string;
|
||||
callerHeaders?: Record<string, string>;
|
||||
}): CapturedCall {
|
||||
const captured: CapturedCall = {};
|
||||
|
||||
const baseStreamFn: StreamFn = (_model, _context, options) => {
|
||||
captured.headers = options?.headers;
|
||||
options?.onPayload?.({});
|
||||
return createAssistantMessageEventStream();
|
||||
};
|
||||
const agent = { streamFn: baseStreamFn };
|
||||
|
||||
applyExtraParamsToAgent(agent, undefined, params.provider, params.modelId);
|
||||
|
||||
const model = {
|
||||
api: "openai-completions",
|
||||
provider: params.provider,
|
||||
id: params.modelId,
|
||||
} as Model<"openai-completions">;
|
||||
const context: Context = { messages: [] };
|
||||
|
||||
void agent.streamFn?.(model, context, {
|
||||
headers: params.callerHeaders,
|
||||
});
|
||||
|
||||
return captured;
|
||||
}
|
||||
|
||||
describe("extra-params: Kilocode wrapper", () => {
|
||||
const envSnapshot = captureEnv(["KILOCODE_FEATURE"]);
|
||||
|
||||
afterEach(() => {
|
||||
envSnapshot.restore();
|
||||
});
|
||||
|
||||
it("injects X-KILOCODE-FEATURE header with default value", () => {
|
||||
delete process.env.KILOCODE_FEATURE;
|
||||
|
||||
const { headers } = applyAndCapture({
|
||||
provider: "kilocode",
|
||||
modelId: "anthropic/claude-sonnet-4",
|
||||
});
|
||||
|
||||
expect(headers?.["X-KILOCODE-FEATURE"]).toBe("openclaw");
|
||||
});
|
||||
|
||||
it("reads X-KILOCODE-FEATURE from KILOCODE_FEATURE env var", () => {
|
||||
process.env.KILOCODE_FEATURE = "custom-feature";
|
||||
|
||||
const { headers } = applyAndCapture({
|
||||
provider: "kilocode",
|
||||
modelId: "anthropic/claude-sonnet-4",
|
||||
});
|
||||
|
||||
expect(headers?.["X-KILOCODE-FEATURE"]).toBe("custom-feature");
|
||||
});
|
||||
|
||||
it("cannot be overridden by caller headers", () => {
|
||||
delete process.env.KILOCODE_FEATURE;
|
||||
|
||||
const { headers } = applyAndCapture({
|
||||
provider: "kilocode",
|
||||
modelId: "anthropic/claude-sonnet-4",
|
||||
callerHeaders: { "X-KILOCODE-FEATURE": "should-be-overwritten" },
|
||||
});
|
||||
|
||||
expect(headers?.["X-KILOCODE-FEATURE"]).toBe("openclaw");
|
||||
});
|
||||
|
||||
it("does not inject header for non-kilocode providers", () => {
|
||||
const { headers } = applyAndCapture({
|
||||
provider: "openrouter",
|
||||
modelId: "anthropic/claude-sonnet-4",
|
||||
});
|
||||
|
||||
expect(headers?.["X-KILOCODE-FEATURE"]).toBeUndefined();
|
||||
});
|
||||
});
|
||||
|
||||
describe("extra-params: Kilocode kilo/auto reasoning", () => {
|
||||
it("does not inject reasoning.effort for kilo/auto", () => {
|
||||
let capturedPayload: Record<string, unknown> | undefined;
|
||||
|
||||
const baseStreamFn: StreamFn = (_model, _context, options) => {
|
||||
const payload: Record<string, unknown> = { reasoning_effort: "high" };
|
||||
options?.onPayload?.(payload);
|
||||
capturedPayload = payload;
|
||||
return createAssistantMessageEventStream();
|
||||
};
|
||||
const agent = { streamFn: baseStreamFn };
|
||||
|
||||
// Pass thinking level explicitly (6th parameter) to trigger reasoning injection
|
||||
applyExtraParamsToAgent(agent, undefined, "kilocode", "kilo/auto", undefined, "high");
|
||||
|
||||
const model = {
|
||||
api: "openai-completions",
|
||||
provider: "kilocode",
|
||||
id: "kilo/auto",
|
||||
} as Model<"openai-completions">;
|
||||
const context: Context = { messages: [] };
|
||||
|
||||
void agent.streamFn?.(model, context, {});
|
||||
|
||||
// kilo/auto should not have reasoning injected
|
||||
expect(capturedPayload?.reasoning).toBeUndefined();
|
||||
expect(capturedPayload).not.toHaveProperty("reasoning_effort");
|
||||
});
|
||||
|
||||
it("injects reasoning.effort for non-auto kilocode models", () => {
|
||||
let capturedPayload: Record<string, unknown> | undefined;
|
||||
|
||||
const baseStreamFn: StreamFn = (_model, _context, options) => {
|
||||
const payload: Record<string, unknown> = {};
|
||||
options?.onPayload?.(payload);
|
||||
capturedPayload = payload;
|
||||
return createAssistantMessageEventStream();
|
||||
};
|
||||
const agent = { streamFn: baseStreamFn };
|
||||
|
||||
applyExtraParamsToAgent(
|
||||
agent,
|
||||
undefined,
|
||||
"kilocode",
|
||||
"anthropic/claude-sonnet-4",
|
||||
undefined,
|
||||
"high",
|
||||
);
|
||||
|
||||
const model = {
|
||||
api: "openai-completions",
|
||||
provider: "kilocode",
|
||||
id: "anthropic/claude-sonnet-4",
|
||||
} as Model<"openai-completions">;
|
||||
const context: Context = { messages: [] };
|
||||
|
||||
void agent.streamFn?.(model, context, {});
|
||||
|
||||
// Non-auto models should have reasoning injected
|
||||
expect(capturedPayload?.reasoning).toEqual({ effort: "high" });
|
||||
});
|
||||
|
||||
it("does not inject reasoning.effort for x-ai models", () => {
|
||||
let capturedPayload: Record<string, unknown> | undefined;
|
||||
|
||||
const baseStreamFn: StreamFn = (_model, _context, options) => {
|
||||
const payload: Record<string, unknown> = { reasoning_effort: "high" };
|
||||
options?.onPayload?.(payload);
|
||||
capturedPayload = payload;
|
||||
return createAssistantMessageEventStream();
|
||||
};
|
||||
const agent = { streamFn: baseStreamFn };
|
||||
|
||||
applyExtraParamsToAgent(agent, undefined, "kilocode", "x-ai/grok-3", undefined, "high");
|
||||
|
||||
const model = {
|
||||
api: "openai-completions",
|
||||
provider: "kilocode",
|
||||
id: "x-ai/grok-3",
|
||||
} as Model<"openai-completions">;
|
||||
const context: Context = { messages: [] };
|
||||
|
||||
void agent.streamFn?.(model, context, {});
|
||||
|
||||
// x-ai models reject reasoning.effort — should be skipped
|
||||
expect(capturedPayload?.reasoning).toBeUndefined();
|
||||
expect(capturedPayload).not.toHaveProperty("reasoning_effort");
|
||||
});
|
||||
});
|
||||
@@ -9,6 +9,15 @@ const OPENROUTER_APP_HEADERS: Record<string, string> = {
|
||||
"HTTP-Referer": "https://openclaw.ai",
|
||||
"X-Title": "OpenClaw",
|
||||
};
|
||||
const KILOCODE_FEATURE_HEADER = "X-KILOCODE-FEATURE";
|
||||
const KILOCODE_FEATURE_DEFAULT = "openclaw";
|
||||
const KILOCODE_FEATURE_ENV_VAR = "KILOCODE_FEATURE";
|
||||
|
||||
function resolveKilocodeAppHeaders(): Record<string, string> {
|
||||
const feature = process.env[KILOCODE_FEATURE_ENV_VAR]?.trim() || KILOCODE_FEATURE_DEFAULT;
|
||||
return { [KILOCODE_FEATURE_HEADER]: feature };
|
||||
}
|
||||
|
||||
const ANTHROPIC_CONTEXT_1M_BETA = "context-1m-2025-08-07";
|
||||
const ANTHROPIC_1M_MODEL_PREFIXES = ["claude-opus-4", "claude-sonnet-4"] as const;
|
||||
// NOTE: We only force `store=true` for *direct* OpenAI Responses.
|
||||
@@ -846,6 +855,45 @@ function createKimiCodingAnthropicToolSchemaWrapper(baseStreamFn: StreamFn | und
|
||||
* Create a streamFn wrapper that adds OpenRouter app attribution headers
|
||||
* and injects reasoning.effort based on the configured thinking level.
|
||||
*/
|
||||
function normalizeProxyReasoningPayload(payload: unknown, thinkingLevel?: ThinkLevel): void {
|
||||
if (!payload || typeof payload !== "object") {
|
||||
return;
|
||||
}
|
||||
|
||||
const payloadObj = payload as Record<string, unknown>;
|
||||
|
||||
// pi-ai may inject a top-level reasoning_effort (OpenAI flat format).
|
||||
// OpenRouter-compatible proxy gateways expect the nested reasoning.effort
|
||||
// shape instead, and some models reject the flat field outright.
|
||||
delete payloadObj.reasoning_effort;
|
||||
|
||||
// When thinking is "off", or provider/model guards disable injection,
|
||||
// leave reasoning unset after normalizing away the legacy flat field.
|
||||
if (!thinkingLevel || thinkingLevel === "off") {
|
||||
return;
|
||||
}
|
||||
|
||||
const existingReasoning = payloadObj.reasoning;
|
||||
|
||||
// OpenRouter treats reasoning.effort and reasoning.max_tokens as
|
||||
// alternative controls. If max_tokens is already present, do not inject
|
||||
// effort and do not overwrite caller-supplied reasoning.
|
||||
if (
|
||||
existingReasoning &&
|
||||
typeof existingReasoning === "object" &&
|
||||
!Array.isArray(existingReasoning)
|
||||
) {
|
||||
const reasoningObj = existingReasoning as Record<string, unknown>;
|
||||
if (!("max_tokens" in reasoningObj) && !("effort" in reasoningObj)) {
|
||||
reasoningObj.effort = mapThinkingLevelToOpenRouterReasoningEffort(thinkingLevel);
|
||||
}
|
||||
} else if (!existingReasoning) {
|
||||
payloadObj.reasoning = {
|
||||
effort: mapThinkingLevelToOpenRouterReasoningEffort(thinkingLevel),
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
function createOpenRouterWrapper(
|
||||
baseStreamFn: StreamFn | undefined,
|
||||
thinkingLevel?: ThinkLevel,
|
||||
@@ -860,42 +908,7 @@ function createOpenRouterWrapper(
|
||||
...options?.headers,
|
||||
},
|
||||
onPayload: (payload) => {
|
||||
if (thinkingLevel && payload && typeof payload === "object") {
|
||||
const payloadObj = payload as Record<string, unknown>;
|
||||
|
||||
// pi-ai may inject a top-level reasoning_effort (OpenAI flat format).
|
||||
// OpenRouter expects the nested reasoning.effort format instead, and
|
||||
// rejects payloads containing both fields. Remove the flat field so
|
||||
// only the nested one is sent.
|
||||
delete payloadObj.reasoning_effort;
|
||||
|
||||
// When thinking is "off", do not inject reasoning at all.
|
||||
// Some models (e.g. deepseek/deepseek-r1) require reasoning and reject
|
||||
// { effort: "none" } with "Reasoning is mandatory for this endpoint and
|
||||
// cannot be disabled." Omitting the field lets each model use its own
|
||||
// default reasoning behavior.
|
||||
if (thinkingLevel !== "off") {
|
||||
const existingReasoning = payloadObj.reasoning;
|
||||
|
||||
// OpenRouter treats reasoning.effort and reasoning.max_tokens as
|
||||
// alternative controls. If max_tokens is already present, do not
|
||||
// inject effort and do not overwrite caller-supplied reasoning.
|
||||
if (
|
||||
existingReasoning &&
|
||||
typeof existingReasoning === "object" &&
|
||||
!Array.isArray(existingReasoning)
|
||||
) {
|
||||
const reasoningObj = existingReasoning as Record<string, unknown>;
|
||||
if (!("max_tokens" in reasoningObj) && !("effort" in reasoningObj)) {
|
||||
reasoningObj.effort = mapThinkingLevelToOpenRouterReasoningEffort(thinkingLevel);
|
||||
}
|
||||
} else if (!existingReasoning) {
|
||||
payloadObj.reasoning = {
|
||||
effort: mapThinkingLevelToOpenRouterReasoningEffort(thinkingLevel),
|
||||
};
|
||||
}
|
||||
}
|
||||
}
|
||||
normalizeProxyReasoningPayload(payload, thinkingLevel);
|
||||
onPayload?.(payload);
|
||||
},
|
||||
});
|
||||
@@ -903,14 +916,41 @@ function createOpenRouterWrapper(
|
||||
}
|
||||
|
||||
/**
|
||||
* Models on OpenRouter that do not support the `reasoning.effort` parameter.
|
||||
* Injecting it causes "Invalid arguments passed to the model" errors.
|
||||
* Models on OpenRouter-style proxy providers that reject `reasoning.effort`.
|
||||
*/
|
||||
function isOpenRouterReasoningUnsupported(modelId: string): boolean {
|
||||
function isProxyReasoningUnsupported(modelId: string): boolean {
|
||||
const id = modelId.toLowerCase();
|
||||
return id.startsWith("x-ai/");
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a streamFn wrapper that adds the Kilocode feature attribution header
|
||||
* and injects reasoning.effort based on the configured thinking level.
|
||||
*
|
||||
* The Kilocode provider gateway manages provider-specific quirks (e.g. cache
|
||||
* control) server-side, so we only handle header injection and reasoning here.
|
||||
*/
|
||||
function createKilocodeWrapper(
|
||||
baseStreamFn: StreamFn | undefined,
|
||||
thinkingLevel?: ThinkLevel,
|
||||
): StreamFn {
|
||||
const underlying = baseStreamFn ?? streamSimple;
|
||||
return (model, context, options) => {
|
||||
const onPayload = options?.onPayload;
|
||||
return underlying(model, context, {
|
||||
...options,
|
||||
headers: {
|
||||
...options?.headers,
|
||||
...resolveKilocodeAppHeaders(),
|
||||
},
|
||||
onPayload: (payload) => {
|
||||
normalizeProxyReasoningPayload(payload, thinkingLevel);
|
||||
onPayload?.(payload);
|
||||
},
|
||||
});
|
||||
};
|
||||
}
|
||||
|
||||
function isGemini31Model(modelId: string): boolean {
|
||||
const normalized = modelId.toLowerCase();
|
||||
return normalized.includes("gemini-3.1-pro") || normalized.includes("gemini-3.1-flash");
|
||||
@@ -1118,12 +1158,22 @@ export function applyExtraParamsToAgent(
|
||||
// and reject payloads containing it with "Invalid arguments passed to the
|
||||
// model." Skip reasoning injection for these models.
|
||||
// See: openclaw/openclaw#32039
|
||||
const skipReasoningInjection = modelId === "auto" || isOpenRouterReasoningUnsupported(modelId);
|
||||
const skipReasoningInjection = modelId === "auto" || isProxyReasoningUnsupported(modelId);
|
||||
const openRouterThinkingLevel = skipReasoningInjection ? undefined : thinkingLevel;
|
||||
agent.streamFn = createOpenRouterWrapper(agent.streamFn, openRouterThinkingLevel);
|
||||
agent.streamFn = createOpenRouterSystemCacheWrapper(agent.streamFn);
|
||||
}
|
||||
|
||||
if (provider === "kilocode") {
|
||||
log.debug(`applying Kilocode feature header for ${provider}/${modelId}`);
|
||||
// kilo/auto is a dynamic routing model — skip reasoning injection
|
||||
// (same rationale as OpenRouter "auto"). See: openclaw/openclaw#24851
|
||||
// Also skip for models known to reject reasoning.effort (e.g. x-ai/*).
|
||||
const kilocodeThinkingLevel =
|
||||
modelId === "kilo/auto" || isProxyReasoningUnsupported(modelId) ? undefined : thinkingLevel;
|
||||
agent.streamFn = createKilocodeWrapper(agent.streamFn, kilocodeThinkingLevel);
|
||||
}
|
||||
|
||||
if (provider === "amazon-bedrock" && !isAnthropicBedrockModel(modelId)) {
|
||||
log.debug(`disabling prompt caching for non-Anthropic Bedrock model ${provider}/${modelId}`);
|
||||
agent.streamFn = createBedrockNoCacheWrapper(agent.streamFn);
|
||||
|
||||
Reference in New Issue
Block a user