fix(providers): make all models available in kilocode provider (#32352)

* kilocode: dynamic model discovery, kilo/auto default, cooldown exemption - Replace 9-model hardcoded catalog with dynamic discovery from GET /api/gateway/models (Venice-like pattern with static fallback) - Default model changed from anthropic/claude-opus-4.6 to kilo/auto (smart routing model) - Add createKilocodeWrapper for X-KILOCODE-FEATURE header injection and reasoning.effort handling (skip for kilo/auto) - Add kilocode to cooldown-exempt providers (proxy like OpenRouter) - Keep sync buildKilocodeProvider for onboarding, add async buildKilocodeProviderWithDiscovery for implicit provider resolution - Per-token gateway pricing converted to per-1M-token for cost fields * kilocode: skip reasoning injection for x-ai models, harden discovery loop * fix(kilocode): keep valid discovered duplicates (openclaw#32352, thanks @pandemicsyn) * refactor(proxy): normalize reasoning payload guards (openclaw#32352, thanks @pandemicsyn) * chore(changelog): note kilocode hardening (openclaw#32352, thanks @pandemicsyn and @vincentkoc) * chore(changelog): fix kilocode note format (openclaw#32352, thanks @pandemicsyn and @vincentkoc) * test(kilocode): support auto-model override cases (openclaw#32352, thanks @pandemicsyn) * Update CHANGELOG.md --------- Co-authored-by: Vincent Koc <vincentkoc@ieee.org>
2026-05-10 15:24:58 +00:00 · 2026-03-07 10:14:06 -06:00
parent 786ec21b5a
commit 33e7394861
15 changed files with 832 additions and 168 deletions
--- a/src/agents/pi-embedded-runner/extra-params.kilocode.test.ts
+++ b/src/agents/pi-embedded-runner/extra-params.kilocode.test.ts
@@ -0,0 +1,182 @@
+import type { StreamFn } from "@mariozechner/pi-agent-core";
+import type { Context, Model } from "@mariozechner/pi-ai";
+import { createAssistantMessageEventStream } from "@mariozechner/pi-ai";
+import { afterEach, describe, expect, it } from "vitest";
+import { captureEnv } from "../../test-utils/env.js";
+import { applyExtraParamsToAgent } from "./extra-params.js";
+
+type CapturedCall = {
+  headers?: Record<string, string>;
+  payload?: Record<string, unknown>;
+};
+
+function applyAndCapture(params: {
+  provider: string;
+  modelId: string;
+  callerHeaders?: Record<string, string>;
+}): CapturedCall {
+  const captured: CapturedCall = {};
+
+  const baseStreamFn: StreamFn = (_model, _context, options) => {
+    captured.headers = options?.headers;
+    options?.onPayload?.({});
+    return createAssistantMessageEventStream();
+  };
+  const agent = { streamFn: baseStreamFn };
+
+  applyExtraParamsToAgent(agent, undefined, params.provider, params.modelId);
+
+  const model = {
+    api: "openai-completions",
+    provider: params.provider,
+    id: params.modelId,
+  } as Model<"openai-completions">;
+  const context: Context = { messages: [] };
+
+  void agent.streamFn?.(model, context, {
+    headers: params.callerHeaders,
+  });
+
+  return captured;
+}
+
+describe("extra-params: Kilocode wrapper", () => {
+  const envSnapshot = captureEnv(["KILOCODE_FEATURE"]);
+
+  afterEach(() => {
+    envSnapshot.restore();
+  });
+
+  it("injects X-KILOCODE-FEATURE header with default value", () => {
+    delete process.env.KILOCODE_FEATURE;
+
+    const { headers } = applyAndCapture({
+      provider: "kilocode",
+      modelId: "anthropic/claude-sonnet-4",
+    });
+
+    expect(headers?.["X-KILOCODE-FEATURE"]).toBe("openclaw");
+  });
+
+  it("reads X-KILOCODE-FEATURE from KILOCODE_FEATURE env var", () => {
+    process.env.KILOCODE_FEATURE = "custom-feature";
+
+    const { headers } = applyAndCapture({
+      provider: "kilocode",
+      modelId: "anthropic/claude-sonnet-4",
+    });
+
+    expect(headers?.["X-KILOCODE-FEATURE"]).toBe("custom-feature");
+  });
+
+  it("cannot be overridden by caller headers", () => {
+    delete process.env.KILOCODE_FEATURE;
+
+    const { headers } = applyAndCapture({
+      provider: "kilocode",
+      modelId: "anthropic/claude-sonnet-4",
+      callerHeaders: { "X-KILOCODE-FEATURE": "should-be-overwritten" },
+    });
+
+    expect(headers?.["X-KILOCODE-FEATURE"]).toBe("openclaw");
+  });
+
+  it("does not inject header for non-kilocode providers", () => {
+    const { headers } = applyAndCapture({
+      provider: "openrouter",
+      modelId: "anthropic/claude-sonnet-4",
+    });
+
+    expect(headers?.["X-KILOCODE-FEATURE"]).toBeUndefined();
+  });
+});
+
+describe("extra-params: Kilocode kilo/auto reasoning", () => {
+  it("does not inject reasoning.effort for kilo/auto", () => {
+    let capturedPayload: Record<string, unknown> | undefined;
+
+    const baseStreamFn: StreamFn = (_model, _context, options) => {
+      const payload: Record<string, unknown> = { reasoning_effort: "high" };
+      options?.onPayload?.(payload);
+      capturedPayload = payload;
+      return createAssistantMessageEventStream();
+    };
+    const agent = { streamFn: baseStreamFn };
+
+    // Pass thinking level explicitly (6th parameter) to trigger reasoning injection
+    applyExtraParamsToAgent(agent, undefined, "kilocode", "kilo/auto", undefined, "high");
+
+    const model = {
+      api: "openai-completions",
+      provider: "kilocode",
+      id: "kilo/auto",
+    } as Model<"openai-completions">;
+    const context: Context = { messages: [] };
+
+    void agent.streamFn?.(model, context, {});
+
+    // kilo/auto should not have reasoning injected
+    expect(capturedPayload?.reasoning).toBeUndefined();
+    expect(capturedPayload).not.toHaveProperty("reasoning_effort");
+  });
+
+  it("injects reasoning.effort for non-auto kilocode models", () => {
+    let capturedPayload: Record<string, unknown> | undefined;
+
+    const baseStreamFn: StreamFn = (_model, _context, options) => {
+      const payload: Record<string, unknown> = {};
+      options?.onPayload?.(payload);
+      capturedPayload = payload;
+      return createAssistantMessageEventStream();
+    };
+    const agent = { streamFn: baseStreamFn };
+
+    applyExtraParamsToAgent(
+      agent,
+      undefined,
+      "kilocode",
+      "anthropic/claude-sonnet-4",
+      undefined,
+      "high",
+    );
+
+    const model = {
+      api: "openai-completions",
+      provider: "kilocode",
+      id: "anthropic/claude-sonnet-4",
+    } as Model<"openai-completions">;
+    const context: Context = { messages: [] };
+
+    void agent.streamFn?.(model, context, {});
+
+    // Non-auto models should have reasoning injected
+    expect(capturedPayload?.reasoning).toEqual({ effort: "high" });
+  });
+
+  it("does not inject reasoning.effort for x-ai models", () => {
+    let capturedPayload: Record<string, unknown> | undefined;
+
+    const baseStreamFn: StreamFn = (_model, _context, options) => {
+      const payload: Record<string, unknown> = { reasoning_effort: "high" };
+      options?.onPayload?.(payload);
+      capturedPayload = payload;
+      return createAssistantMessageEventStream();
+    };
+    const agent = { streamFn: baseStreamFn };
+
+    applyExtraParamsToAgent(agent, undefined, "kilocode", "x-ai/grok-3", undefined, "high");
+
+    const model = {
+      api: "openai-completions",
+      provider: "kilocode",
+      id: "x-ai/grok-3",
+    } as Model<"openai-completions">;
+    const context: Context = { messages: [] };
+
+    void agent.streamFn?.(model, context, {});
+
+    // x-ai models reject reasoning.effort — should be skipped
+    expect(capturedPayload?.reasoning).toBeUndefined();
+    expect(capturedPayload).not.toHaveProperty("reasoning_effort");
+  });
+});
--- a/src/agents/pi-embedded-runner/extra-params.ts
+++ b/src/agents/pi-embedded-runner/extra-params.ts
@@ -9,6 +9,15 @@ const OPENROUTER_APP_HEADERS: Record<string, string> = {
  "HTTP-Referer": "https://openclaw.ai",
  "X-Title": "OpenClaw",
 };
+const KILOCODE_FEATURE_HEADER = "X-KILOCODE-FEATURE";
+const KILOCODE_FEATURE_DEFAULT = "openclaw";
+const KILOCODE_FEATURE_ENV_VAR = "KILOCODE_FEATURE";
+
+function resolveKilocodeAppHeaders(): Record<string, string> {
+  const feature = process.env[KILOCODE_FEATURE_ENV_VAR]?.trim() || KILOCODE_FEATURE_DEFAULT;
+  return { [KILOCODE_FEATURE_HEADER]: feature };
+}
+
 const ANTHROPIC_CONTEXT_1M_BETA = "context-1m-2025-08-07";
 const ANTHROPIC_1M_MODEL_PREFIXES = ["claude-opus-4", "claude-sonnet-4"] as const;
 // NOTE: We only force `store=true` for *direct* OpenAI Responses.
@@ -846,6 +855,45 @@ function createKimiCodingAnthropicToolSchemaWrapper(baseStreamFn: StreamFn | und
 * Create a streamFn wrapper that adds OpenRouter app attribution headers
 * and injects reasoning.effort based on the configured thinking level.
 */
+function normalizeProxyReasoningPayload(payload: unknown, thinkingLevel?: ThinkLevel): void {
+  if (!payload || typeof payload !== "object") {
+    return;
+  }
+
+  const payloadObj = payload as Record<string, unknown>;
+
+  // pi-ai may inject a top-level reasoning_effort (OpenAI flat format).
+  // OpenRouter-compatible proxy gateways expect the nested reasoning.effort
+  // shape instead, and some models reject the flat field outright.
+  delete payloadObj.reasoning_effort;
+
+  // When thinking is "off", or provider/model guards disable injection,
+  // leave reasoning unset after normalizing away the legacy flat field.
+  if (!thinkingLevel || thinkingLevel === "off") {
+    return;
+  }
+
+  const existingReasoning = payloadObj.reasoning;
+
+  // OpenRouter treats reasoning.effort and reasoning.max_tokens as
+  // alternative controls. If max_tokens is already present, do not inject
+  // effort and do not overwrite caller-supplied reasoning.
+  if (
+    existingReasoning &&
+    typeof existingReasoning === "object" &&
+    !Array.isArray(existingReasoning)
+  ) {
+    const reasoningObj = existingReasoning as Record<string, unknown>;
+    if (!("max_tokens" in reasoningObj) && !("effort" in reasoningObj)) {
+      reasoningObj.effort = mapThinkingLevelToOpenRouterReasoningEffort(thinkingLevel);
+    }
+  } else if (!existingReasoning) {
+    payloadObj.reasoning = {
+      effort: mapThinkingLevelToOpenRouterReasoningEffort(thinkingLevel),
+    };
+  }
+}
+
 function createOpenRouterWrapper(
  baseStreamFn: StreamFn | undefined,
  thinkingLevel?: ThinkLevel,
@@ -860,42 +908,7 @@ function createOpenRouterWrapper(
        ...options?.headers,
      },
      onPayload: (payload) => {
-        if (thinkingLevel && payload && typeof payload === "object") {
-          const payloadObj = payload as Record<string, unknown>;
-
-          // pi-ai may inject a top-level reasoning_effort (OpenAI flat format).
-          // OpenRouter expects the nested reasoning.effort format instead, and
-          // rejects payloads containing both fields. Remove the flat field so
-          // only the nested one is sent.
-          delete payloadObj.reasoning_effort;
-
-          // When thinking is "off", do not inject reasoning at all.
-          // Some models (e.g. deepseek/deepseek-r1) require reasoning and reject
-          // { effort: "none" } with "Reasoning is mandatory for this endpoint and
-          // cannot be disabled." Omitting the field lets each model use its own
-          // default reasoning behavior.
-          if (thinkingLevel !== "off") {
-            const existingReasoning = payloadObj.reasoning;
-
-            // OpenRouter treats reasoning.effort and reasoning.max_tokens as
-            // alternative controls. If max_tokens is already present, do not
-            // inject effort and do not overwrite caller-supplied reasoning.
-            if (
-              existingReasoning &&
-              typeof existingReasoning === "object" &&
-              !Array.isArray(existingReasoning)
-            ) {
-              const reasoningObj = existingReasoning as Record<string, unknown>;
-              if (!("max_tokens" in reasoningObj) && !("effort" in reasoningObj)) {
-                reasoningObj.effort = mapThinkingLevelToOpenRouterReasoningEffort(thinkingLevel);
-              }
-            } else if (!existingReasoning) {
-              payloadObj.reasoning = {
-                effort: mapThinkingLevelToOpenRouterReasoningEffort(thinkingLevel),
-              };
-            }
-          }
-        }
+        normalizeProxyReasoningPayload(payload, thinkingLevel);
        onPayload?.(payload);
      },
    });
@@ -903,14 +916,41 @@ function createOpenRouterWrapper(
 }

 /**
- * Models on OpenRouter that do not support the `reasoning.effort` parameter.
- * Injecting it causes "Invalid arguments passed to the model" errors.
+ * Models on OpenRouter-style proxy providers that reject `reasoning.effort`.
 */
-function isOpenRouterReasoningUnsupported(modelId: string): boolean {
+function isProxyReasoningUnsupported(modelId: string): boolean {
  const id = modelId.toLowerCase();
  return id.startsWith("x-ai/");
 }

+/**
+ * Create a streamFn wrapper that adds the Kilocode feature attribution header
+ * and injects reasoning.effort based on the configured thinking level.
+ *
+ * The Kilocode provider gateway manages provider-specific quirks (e.g. cache
+ * control) server-side, so we only handle header injection and reasoning here.
+ */
+function createKilocodeWrapper(
+  baseStreamFn: StreamFn | undefined,
+  thinkingLevel?: ThinkLevel,
+): StreamFn {
+  const underlying = baseStreamFn ?? streamSimple;
+  return (model, context, options) => {
+    const onPayload = options?.onPayload;
+    return underlying(model, context, {
+      ...options,
+      headers: {
+        ...options?.headers,
+        ...resolveKilocodeAppHeaders(),
+      },
+      onPayload: (payload) => {
+        normalizeProxyReasoningPayload(payload, thinkingLevel);
+        onPayload?.(payload);
+      },
+    });
+  };
+}
+
 function isGemini31Model(modelId: string): boolean {
  const normalized = modelId.toLowerCase();
  return normalized.includes("gemini-3.1-pro") || normalized.includes("gemini-3.1-flash");
@@ -1118,12 +1158,22 @@ export function applyExtraParamsToAgent(
    // and reject payloads containing it with "Invalid arguments passed to the
    // model." Skip reasoning injection for these models.
    // See: openclaw/openclaw#32039
-    const skipReasoningInjection = modelId === "auto" || isOpenRouterReasoningUnsupported(modelId);
+    const skipReasoningInjection = modelId === "auto" || isProxyReasoningUnsupported(modelId);
    const openRouterThinkingLevel = skipReasoningInjection ? undefined : thinkingLevel;
    agent.streamFn = createOpenRouterWrapper(agent.streamFn, openRouterThinkingLevel);
    agent.streamFn = createOpenRouterSystemCacheWrapper(agent.streamFn);
  }

+  if (provider === "kilocode") {
+    log.debug(`applying Kilocode feature header for ${provider}/${modelId}`);
+    // kilo/auto is a dynamic routing model — skip reasoning injection
+    // (same rationale as OpenRouter "auto"). See: openclaw/openclaw#24851
+    // Also skip for models known to reject reasoning.effort (e.g. x-ai/*).
+    const kilocodeThinkingLevel =
+      modelId === "kilo/auto" || isProxyReasoningUnsupported(modelId) ? undefined : thinkingLevel;
+    agent.streamFn = createKilocodeWrapper(agent.streamFn, kilocodeThinkingLevel);
+  }
+
  if (provider === "amazon-bedrock" && !isAnthropicBedrockModel(modelId)) {
    log.debug(`disabling prompt caching for non-Anthropic Bedrock model ${provider}/${modelId}`);
    agent.streamFn = createBedrockNoCacheWrapper(agent.streamFn);