fix: don't inject reasoning: { effort: "none" } for OpenRouter when thinking is off

"off" is a truthy string, so the existing guard `if (thinkingLevel && ...)`
was always entering the injection block and sending `reasoning: { effort: "none" }`
to every OpenRouter request — even when thinking wasn't enabled. Models that
require reasoning (e.g. deepseek/deepseek-r1) reject this with:
  400 Reasoning is mandatory for this endpoint and cannot be disabled.

Fix: skip the reasoning injection entirely when thinkingLevel is "off".
The reasoning_effort flat-field cleanup still runs. Omitting the reasoning
field lets each model use its own default behavior.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
This commit is contained in:
Tim Jones
2026-02-23 23:07:50 +00:00
committed by Peter Steinberger
parent 69a541c3f0
commit 3e974dc93f
2 changed files with 75 additions and 16 deletions

View File

@@ -202,6 +202,58 @@ describe("applyExtraParamsToAgent", () => {
return calls[0]?.headers;
}
it("does not inject reasoning when thinkingLevel is off (default) for OpenRouter", () => {
// Regression: "off" is a truthy string, so the old code injected
// reasoning: { effort: "none" }, causing a 400 on models that require
// reasoning (e.g. deepseek/deepseek-r1).
const payloads: Record<string, unknown>[] = [];
const baseStreamFn: StreamFn = (_model, _context, options) => {
const payload: Record<string, unknown> = { model: "deepseek/deepseek-r1" };
options?.onPayload?.(payload);
payloads.push(payload);
return {} as ReturnType<StreamFn>;
};
const agent = { streamFn: baseStreamFn };
applyExtraParamsToAgent(agent, undefined, "openrouter", "deepseek/deepseek-r1", undefined, "off");
const model = {
api: "openai-completions",
provider: "openrouter",
id: "deepseek/deepseek-r1",
} as Model<"openai-completions">;
const context: Context = { messages: [] };
void agent.streamFn?.(model, context, {});
expect(payloads).toHaveLength(1);
expect(payloads[0]).not.toHaveProperty("reasoning");
expect(payloads[0]).not.toHaveProperty("reasoning_effort");
});
it("injects reasoning.effort when thinkingLevel is non-off for OpenRouter", () => {
const payloads: Record<string, unknown>[] = [];
const baseStreamFn: StreamFn = (_model, _context, options) => {
const payload: Record<string, unknown> = {};
options?.onPayload?.(payload);
payloads.push(payload);
return {} as ReturnType<StreamFn>;
};
const agent = { streamFn: baseStreamFn };
applyExtraParamsToAgent(agent, undefined, "openrouter", "openrouter/auto", undefined, "low");
const model = {
api: "openai-completions",
provider: "openrouter",
id: "openrouter/auto",
} as Model<"openai-completions">;
const context: Context = { messages: [] };
void agent.streamFn?.(model, context, {});
expect(payloads).toHaveLength(1);
expect(payloads[0]?.reasoning).toEqual({ effort: "low" });
});
it("adds OpenRouter attribution headers to stream options", () => {
const { calls, agent } = createOptionsCaptureAgent();

View File

@@ -435,24 +435,31 @@ function createOpenRouterWrapper(
// only the nested one is sent.
delete payloadObj.reasoning_effort;
const existingReasoning = payloadObj.reasoning;
// When thinking is "off", do not inject reasoning at all.
// Some models (e.g. deepseek/deepseek-r1) require reasoning and reject
// { effort: "none" } with "Reasoning is mandatory for this endpoint and
// cannot be disabled." Omitting the field lets each model use its own
// default reasoning behavior.
if (thinkingLevel !== "off") {
const existingReasoning = payloadObj.reasoning;
// OpenRouter treats reasoning.effort and reasoning.max_tokens as
// alternative controls. If max_tokens is already present, do not
// inject effort and do not overwrite caller-supplied reasoning.
if (
existingReasoning &&
typeof existingReasoning === "object" &&
!Array.isArray(existingReasoning)
) {
const reasoningObj = existingReasoning as Record<string, unknown>;
if (!("max_tokens" in reasoningObj) && !("effort" in reasoningObj)) {
reasoningObj.effort = mapThinkingLevelToOpenRouterReasoningEffort(thinkingLevel);
// OpenRouter treats reasoning.effort and reasoning.max_tokens as
// alternative controls. If max_tokens is already present, do not
// inject effort and do not overwrite caller-supplied reasoning.
if (
existingReasoning &&
typeof existingReasoning === "object" &&
!Array.isArray(existingReasoning)
) {
const reasoningObj = existingReasoning as Record<string, unknown>;
if (!("max_tokens" in reasoningObj) && !("effort" in reasoningObj)) {
reasoningObj.effort = mapThinkingLevelToOpenRouterReasoningEffort(thinkingLevel);
}
} else if (!existingReasoning) {
payloadObj.reasoning = {
effort: mapThinkingLevelToOpenRouterReasoningEffort(thinkingLevel),
};
}
} else if (!existingReasoning) {
payloadObj.reasoning = {
effort: mapThinkingLevelToOpenRouterReasoningEffort(thinkingLevel),
};
}
}
onPayload?.(payload);