fix(ollama): unify context window handling across discovery, merge, and OpenAI-compat transport (#29205)

* fix(ollama): inject num_ctx for OpenAI-compatible transport

* fix(ollama): discover per-model context and preserve higher limits

* fix(agents): prefer matching provider model for fallback limits

* fix(types): require numeric token limits in provider model merge

* fix(types): accept unknown payload in ollama num_ctx wrapper

* fix(types): simplify ollama settled-result extraction

* config(models): add provider flag for Ollama OpenAI num_ctx injection

* config(schema): allow provider num_ctx injection flag

* config(labels): label provider num_ctx injection flag

* config(help): document provider num_ctx injection flag

* agents(ollama): gate OpenAI num_ctx injection with provider config

* tests(ollama): cover provider num_ctx injection flag behavior

* docs(config): list provider num_ctx injection option

* docs(ollama): document OpenAI num_ctx injection toggle

* docs(config): clarify merge token-limit precedence

* config(help): note merge uses higher model token limits

* fix(ollama): cap /api/show discovery concurrency

* fix(ollama): restrict num_ctx injection to OpenAI compat

* tests(ollama): cover ipv6 and compat num_ctx gating

* fix(ollama): detect remote compat endpoints for ollama-labeled providers

* fix(ollama): cap per-model /api/show lookups to bound discovery load
This commit is contained in:
Vincent Koc
2026-02-27 17:20:47 -08:00
committed by GitHub
parent 70a4f25ab1
commit f16ecd1dac
14 changed files with 582 additions and 21 deletions

View File

@@ -171,6 +171,35 @@ describe("resolveModel", () => {
expect(result.model?.id).toBe("missing-model");
});
it("prefers matching configured model metadata for fallback token limits", () => {
const cfg = {
models: {
providers: {
custom: {
baseUrl: "http://localhost:9000",
models: [
{
...makeModel("model-a"),
contextWindow: 4096,
maxTokens: 1024,
},
{
...makeModel("model-b"),
contextWindow: 262144,
maxTokens: 32768,
},
],
},
},
},
} as OpenClawConfig;
const result = resolveModel("custom", "model-b", "/tmp/agent", cfg);
expect(result.model?.contextWindow).toBe(262144);
expect(result.model?.maxTokens).toBe(32768);
});
it("builds an openai-codex fallback for gpt-5.3-codex", () => {
mockOpenAICodexTemplateModel();

View File

@@ -96,6 +96,7 @@ export function resolveModel(
}
const providerCfg = providers[provider];
if (providerCfg || modelId.startsWith("mock-")) {
const configuredModel = providerCfg?.models?.find((candidate) => candidate.id === modelId);
const fallbackModel: Model<Api> = normalizeModelCompat({
id: modelId,
name: modelId,
@@ -105,8 +106,14 @@ export function resolveModel(
reasoning: false,
input: ["text"],
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
contextWindow: providerCfg?.models?.[0]?.contextWindow ?? DEFAULT_CONTEXT_TOKENS,
maxTokens: providerCfg?.models?.[0]?.maxTokens ?? DEFAULT_CONTEXT_TOKENS,
contextWindow:
configuredModel?.contextWindow ??
providerCfg?.models?.[0]?.contextWindow ??
DEFAULT_CONTEXT_TOKENS,
maxTokens:
configuredModel?.maxTokens ??
providerCfg?.models?.[0]?.maxTokens ??
DEFAULT_CONTEXT_TOKENS,
} as Model<Api>);
return { model: fallbackModel, authStorage, modelRegistry };
}

View File

@@ -1,9 +1,13 @@
import { describe, expect, it, vi } from "vitest";
import type { OpenClawConfig } from "../../../config/config.js";
import {
isOllamaCompatProvider,
resolveAttemptFsWorkspaceOnly,
resolveOllamaCompatNumCtxEnabled,
resolvePromptBuildHookResult,
resolvePromptModeForSession,
shouldInjectOllamaCompatNumCtx,
wrapOllamaCompatNumCtx,
wrapStreamFnTrimToolCallNames,
} from "./attempt.js";
@@ -174,3 +178,159 @@ describe("wrapStreamFnTrimToolCallNames", () => {
expect(baseFn).toHaveBeenCalledTimes(1);
});
});
describe("isOllamaCompatProvider", () => {
it("detects native ollama provider id", () => {
expect(
isOllamaCompatProvider({
provider: "ollama",
api: "openai-completions",
baseUrl: "https://example.com/v1",
}),
).toBe(true);
});
it("detects localhost Ollama OpenAI-compatible endpoint", () => {
expect(
isOllamaCompatProvider({
provider: "custom",
api: "openai-completions",
baseUrl: "http://127.0.0.1:11434/v1",
}),
).toBe(true);
});
it("does not misclassify non-local OpenAI-compatible providers", () => {
expect(
isOllamaCompatProvider({
provider: "custom",
api: "openai-completions",
baseUrl: "https://api.openrouter.ai/v1",
}),
).toBe(false);
});
it("detects remote Ollama-compatible endpoint when provider id hints ollama", () => {
expect(
isOllamaCompatProvider({
provider: "my-ollama",
api: "openai-completions",
baseUrl: "http://ollama-host:11434/v1",
}),
).toBe(true);
});
it("detects IPv6 loopback Ollama OpenAI-compatible endpoint", () => {
expect(
isOllamaCompatProvider({
provider: "custom",
api: "openai-completions",
baseUrl: "http://[::1]:11434/v1",
}),
).toBe(true);
});
it("does not classify arbitrary remote hosts on 11434 without ollama provider hint", () => {
expect(
isOllamaCompatProvider({
provider: "custom",
api: "openai-completions",
baseUrl: "http://example.com:11434/v1",
}),
).toBe(false);
});
});
describe("wrapOllamaCompatNumCtx", () => {
it("injects num_ctx and preserves downstream onPayload hooks", () => {
let payloadSeen: Record<string, unknown> | undefined;
const baseFn = vi.fn((_model, _context, options) => {
const payload: Record<string, unknown> = { options: { temperature: 0.1 } };
options?.onPayload?.(payload);
payloadSeen = payload;
return {} as never;
});
const downstream = vi.fn();
const wrapped = wrapOllamaCompatNumCtx(baseFn as never, 202752);
void wrapped({} as never, {} as never, { onPayload: downstream } as never);
expect(baseFn).toHaveBeenCalledTimes(1);
expect((payloadSeen?.options as Record<string, unknown> | undefined)?.num_ctx).toBe(202752);
expect(downstream).toHaveBeenCalledTimes(1);
});
});
describe("resolveOllamaCompatNumCtxEnabled", () => {
it("defaults to true when config is missing", () => {
expect(resolveOllamaCompatNumCtxEnabled({ providerId: "ollama" })).toBe(true);
});
it("defaults to true when provider config is missing", () => {
expect(
resolveOllamaCompatNumCtxEnabled({
config: { models: { providers: {} } },
providerId: "ollama",
}),
).toBe(true);
});
it("returns false when provider flag is explicitly disabled", () => {
expect(
resolveOllamaCompatNumCtxEnabled({
config: {
models: {
providers: {
ollama: {
baseUrl: "http://127.0.0.1:11434/v1",
api: "openai-completions",
injectNumCtxForOpenAICompat: false,
models: [],
},
},
},
},
providerId: "ollama",
}),
).toBe(false);
});
});
describe("shouldInjectOllamaCompatNumCtx", () => {
it("requires openai-completions adapter", () => {
expect(
shouldInjectOllamaCompatNumCtx({
model: {
provider: "ollama",
api: "openai-responses",
baseUrl: "http://127.0.0.1:11434/v1",
},
}),
).toBe(false);
});
it("respects provider flag disablement", () => {
expect(
shouldInjectOllamaCompatNumCtx({
model: {
provider: "ollama",
api: "openai-completions",
baseUrl: "http://127.0.0.1:11434/v1",
},
config: {
models: {
providers: {
ollama: {
baseUrl: "http://127.0.0.1:11434/v1",
api: "openai-completions",
injectNumCtxForOpenAICompat: false,
models: [],
},
},
},
},
providerId: "ollama",
}),
).toBe(false);
});
});

View File

@@ -40,7 +40,7 @@ import { resolveOpenClawDocsPath } from "../../docs-path.js";
import { isTimeoutError } from "../../failover-error.js";
import { resolveImageSanitizationLimits } from "../../image-sanitization.js";
import { resolveModelAuthMode } from "../../model-auth.js";
import { resolveDefaultModelForAgent } from "../../model-selection.js";
import { normalizeProviderId, resolveDefaultModelForAgent } from "../../model-selection.js";
import { createOllamaStreamFn, OLLAMA_NATIVE_BASE_URL } from "../../ollama-stream.js";
import { resolveOwnerDisplaySetting } from "../../owner-display.js";
import {
@@ -127,6 +127,104 @@ type PromptBuildHookRunner = {
) => Promise<PluginHookBeforeAgentStartResult | undefined>;
};
export function isOllamaCompatProvider(model: {
provider?: string;
baseUrl?: string;
api?: string;
}): boolean {
const providerId = normalizeProviderId(model.provider ?? "");
if (providerId === "ollama") {
return true;
}
if (!model.baseUrl) {
return false;
}
try {
const parsed = new URL(model.baseUrl);
const hostname = parsed.hostname.toLowerCase();
const isLocalhost =
hostname === "localhost" ||
hostname === "127.0.0.1" ||
hostname === "::1" ||
hostname === "[::1]";
if (isLocalhost && parsed.port === "11434") {
return true;
}
// Allow remote/LAN Ollama OpenAI-compatible endpoints when the provider id
// itself indicates Ollama usage (e.g. "my-ollama").
const providerHintsOllama = providerId.includes("ollama");
const isOllamaPort = parsed.port === "11434";
const isOllamaCompatPath = parsed.pathname === "/" || /^\/v1\/?$/i.test(parsed.pathname);
return providerHintsOllama && isOllamaPort && isOllamaCompatPath;
} catch {
return false;
}
}
export function resolveOllamaCompatNumCtxEnabled(params: {
config?: OpenClawConfig;
providerId?: string;
}): boolean {
const providerId = params.providerId?.trim();
if (!providerId) {
return true;
}
const providers = params.config?.models?.providers;
if (!providers) {
return true;
}
const direct = providers[providerId];
if (direct) {
return direct.injectNumCtxForOpenAICompat ?? true;
}
const normalized = normalizeProviderId(providerId);
for (const [candidateId, candidate] of Object.entries(providers)) {
if (normalizeProviderId(candidateId) === normalized) {
return candidate.injectNumCtxForOpenAICompat ?? true;
}
}
return true;
}
export function shouldInjectOllamaCompatNumCtx(params: {
model: { api?: string; provider?: string; baseUrl?: string };
config?: OpenClawConfig;
providerId?: string;
}): boolean {
// Restrict to the OpenAI-compatible adapter path only.
if (params.model.api !== "openai-completions") {
return false;
}
if (!isOllamaCompatProvider(params.model)) {
return false;
}
return resolveOllamaCompatNumCtxEnabled({
config: params.config,
providerId: params.providerId,
});
}
export function wrapOllamaCompatNumCtx(baseFn: StreamFn | undefined, numCtx: number): StreamFn {
const streamFn = baseFn ?? streamSimple;
return (model, context, options) =>
streamFn(model, context, {
...options,
onPayload: (payload: unknown) => {
if (!payload || typeof payload !== "object") {
options?.onPayload?.(payload);
return;
}
const payloadRecord = payload as Record<string, unknown>;
if (!payloadRecord.options || typeof payloadRecord.options !== "object") {
payloadRecord.options = {};
}
(payloadRecord.options as Record<string, unknown>).num_ctx = numCtx;
options?.onPayload?.(payload);
},
});
}
function trimWhitespaceFromToolCallNamesInMessage(message: unknown): void {
if (!message || typeof message !== "object") {
return;
@@ -773,6 +871,27 @@ export async function runEmbeddedAttempt(
activeSession.agent.streamFn = streamSimple;
}
// Ollama with OpenAI-compatible API needs num_ctx in payload.options.
// Otherwise Ollama defaults to a 4096 context window.
const providerIdForNumCtx =
typeof params.model.provider === "string" && params.model.provider.trim().length > 0
? params.model.provider
: params.provider;
const shouldInjectNumCtx = shouldInjectOllamaCompatNumCtx({
model: params.model,
config: params.config,
providerId: providerIdForNumCtx,
});
if (shouldInjectNumCtx) {
const numCtx = Math.max(
1,
Math.floor(
params.model.contextWindow ?? params.model.maxTokens ?? DEFAULT_CONTEXT_TOKENS,
),
);
activeSession.agent.streamFn = wrapOllamaCompatNumCtx(activeSession.agent.streamFn, numCtx);
}
applyExtraParamsToAgent(
activeSession.agent,
params.config,