mirror of
https://github.com/openclaw/openclaw.git
synced 2026-05-11 03:14:36 +00:00
fix(ollama): unify context window handling across discovery, merge, and OpenAI-compat transport (#29205)
* fix(ollama): inject num_ctx for OpenAI-compatible transport * fix(ollama): discover per-model context and preserve higher limits * fix(agents): prefer matching provider model for fallback limits * fix(types): require numeric token limits in provider model merge * fix(types): accept unknown payload in ollama num_ctx wrapper * fix(types): simplify ollama settled-result extraction * config(models): add provider flag for Ollama OpenAI num_ctx injection * config(schema): allow provider num_ctx injection flag * config(labels): label provider num_ctx injection flag * config(help): document provider num_ctx injection flag * agents(ollama): gate OpenAI num_ctx injection with provider config * tests(ollama): cover provider num_ctx injection flag behavior * docs(config): list provider num_ctx injection option * docs(ollama): document OpenAI num_ctx injection toggle * docs(config): clarify merge token-limit precedence * config(help): note merge uses higher model token limits * fix(ollama): cap /api/show discovery concurrency * fix(ollama): restrict num_ctx injection to OpenAI compat * tests(ollama): cover ipv6 and compat num_ctx gating * fix(ollama): detect remote compat endpoints for ollama-labeled providers * fix(ollama): cap per-model /api/show lookups to bound discovery load
This commit is contained in:
@@ -171,6 +171,35 @@ describe("resolveModel", () => {
|
||||
expect(result.model?.id).toBe("missing-model");
|
||||
});
|
||||
|
||||
it("prefers matching configured model metadata for fallback token limits", () => {
|
||||
const cfg = {
|
||||
models: {
|
||||
providers: {
|
||||
custom: {
|
||||
baseUrl: "http://localhost:9000",
|
||||
models: [
|
||||
{
|
||||
...makeModel("model-a"),
|
||||
contextWindow: 4096,
|
||||
maxTokens: 1024,
|
||||
},
|
||||
{
|
||||
...makeModel("model-b"),
|
||||
contextWindow: 262144,
|
||||
maxTokens: 32768,
|
||||
},
|
||||
],
|
||||
},
|
||||
},
|
||||
},
|
||||
} as OpenClawConfig;
|
||||
|
||||
const result = resolveModel("custom", "model-b", "/tmp/agent", cfg);
|
||||
|
||||
expect(result.model?.contextWindow).toBe(262144);
|
||||
expect(result.model?.maxTokens).toBe(32768);
|
||||
});
|
||||
|
||||
it("builds an openai-codex fallback for gpt-5.3-codex", () => {
|
||||
mockOpenAICodexTemplateModel();
|
||||
|
||||
|
||||
@@ -96,6 +96,7 @@ export function resolveModel(
|
||||
}
|
||||
const providerCfg = providers[provider];
|
||||
if (providerCfg || modelId.startsWith("mock-")) {
|
||||
const configuredModel = providerCfg?.models?.find((candidate) => candidate.id === modelId);
|
||||
const fallbackModel: Model<Api> = normalizeModelCompat({
|
||||
id: modelId,
|
||||
name: modelId,
|
||||
@@ -105,8 +106,14 @@ export function resolveModel(
|
||||
reasoning: false,
|
||||
input: ["text"],
|
||||
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
|
||||
contextWindow: providerCfg?.models?.[0]?.contextWindow ?? DEFAULT_CONTEXT_TOKENS,
|
||||
maxTokens: providerCfg?.models?.[0]?.maxTokens ?? DEFAULT_CONTEXT_TOKENS,
|
||||
contextWindow:
|
||||
configuredModel?.contextWindow ??
|
||||
providerCfg?.models?.[0]?.contextWindow ??
|
||||
DEFAULT_CONTEXT_TOKENS,
|
||||
maxTokens:
|
||||
configuredModel?.maxTokens ??
|
||||
providerCfg?.models?.[0]?.maxTokens ??
|
||||
DEFAULT_CONTEXT_TOKENS,
|
||||
} as Model<Api>);
|
||||
return { model: fallbackModel, authStorage, modelRegistry };
|
||||
}
|
||||
|
||||
@@ -1,9 +1,13 @@
|
||||
import { describe, expect, it, vi } from "vitest";
|
||||
import type { OpenClawConfig } from "../../../config/config.js";
|
||||
import {
|
||||
isOllamaCompatProvider,
|
||||
resolveAttemptFsWorkspaceOnly,
|
||||
resolveOllamaCompatNumCtxEnabled,
|
||||
resolvePromptBuildHookResult,
|
||||
resolvePromptModeForSession,
|
||||
shouldInjectOllamaCompatNumCtx,
|
||||
wrapOllamaCompatNumCtx,
|
||||
wrapStreamFnTrimToolCallNames,
|
||||
} from "./attempt.js";
|
||||
|
||||
@@ -174,3 +178,159 @@ describe("wrapStreamFnTrimToolCallNames", () => {
|
||||
expect(baseFn).toHaveBeenCalledTimes(1);
|
||||
});
|
||||
});
|
||||
|
||||
describe("isOllamaCompatProvider", () => {
|
||||
it("detects native ollama provider id", () => {
|
||||
expect(
|
||||
isOllamaCompatProvider({
|
||||
provider: "ollama",
|
||||
api: "openai-completions",
|
||||
baseUrl: "https://example.com/v1",
|
||||
}),
|
||||
).toBe(true);
|
||||
});
|
||||
|
||||
it("detects localhost Ollama OpenAI-compatible endpoint", () => {
|
||||
expect(
|
||||
isOllamaCompatProvider({
|
||||
provider: "custom",
|
||||
api: "openai-completions",
|
||||
baseUrl: "http://127.0.0.1:11434/v1",
|
||||
}),
|
||||
).toBe(true);
|
||||
});
|
||||
|
||||
it("does not misclassify non-local OpenAI-compatible providers", () => {
|
||||
expect(
|
||||
isOllamaCompatProvider({
|
||||
provider: "custom",
|
||||
api: "openai-completions",
|
||||
baseUrl: "https://api.openrouter.ai/v1",
|
||||
}),
|
||||
).toBe(false);
|
||||
});
|
||||
|
||||
it("detects remote Ollama-compatible endpoint when provider id hints ollama", () => {
|
||||
expect(
|
||||
isOllamaCompatProvider({
|
||||
provider: "my-ollama",
|
||||
api: "openai-completions",
|
||||
baseUrl: "http://ollama-host:11434/v1",
|
||||
}),
|
||||
).toBe(true);
|
||||
});
|
||||
|
||||
it("detects IPv6 loopback Ollama OpenAI-compatible endpoint", () => {
|
||||
expect(
|
||||
isOllamaCompatProvider({
|
||||
provider: "custom",
|
||||
api: "openai-completions",
|
||||
baseUrl: "http://[::1]:11434/v1",
|
||||
}),
|
||||
).toBe(true);
|
||||
});
|
||||
|
||||
it("does not classify arbitrary remote hosts on 11434 without ollama provider hint", () => {
|
||||
expect(
|
||||
isOllamaCompatProvider({
|
||||
provider: "custom",
|
||||
api: "openai-completions",
|
||||
baseUrl: "http://example.com:11434/v1",
|
||||
}),
|
||||
).toBe(false);
|
||||
});
|
||||
});
|
||||
|
||||
describe("wrapOllamaCompatNumCtx", () => {
|
||||
it("injects num_ctx and preserves downstream onPayload hooks", () => {
|
||||
let payloadSeen: Record<string, unknown> | undefined;
|
||||
const baseFn = vi.fn((_model, _context, options) => {
|
||||
const payload: Record<string, unknown> = { options: { temperature: 0.1 } };
|
||||
options?.onPayload?.(payload);
|
||||
payloadSeen = payload;
|
||||
return {} as never;
|
||||
});
|
||||
const downstream = vi.fn();
|
||||
|
||||
const wrapped = wrapOllamaCompatNumCtx(baseFn as never, 202752);
|
||||
void wrapped({} as never, {} as never, { onPayload: downstream } as never);
|
||||
|
||||
expect(baseFn).toHaveBeenCalledTimes(1);
|
||||
expect((payloadSeen?.options as Record<string, unknown> | undefined)?.num_ctx).toBe(202752);
|
||||
expect(downstream).toHaveBeenCalledTimes(1);
|
||||
});
|
||||
});
|
||||
|
||||
describe("resolveOllamaCompatNumCtxEnabled", () => {
|
||||
it("defaults to true when config is missing", () => {
|
||||
expect(resolveOllamaCompatNumCtxEnabled({ providerId: "ollama" })).toBe(true);
|
||||
});
|
||||
|
||||
it("defaults to true when provider config is missing", () => {
|
||||
expect(
|
||||
resolveOllamaCompatNumCtxEnabled({
|
||||
config: { models: { providers: {} } },
|
||||
providerId: "ollama",
|
||||
}),
|
||||
).toBe(true);
|
||||
});
|
||||
|
||||
it("returns false when provider flag is explicitly disabled", () => {
|
||||
expect(
|
||||
resolveOllamaCompatNumCtxEnabled({
|
||||
config: {
|
||||
models: {
|
||||
providers: {
|
||||
ollama: {
|
||||
baseUrl: "http://127.0.0.1:11434/v1",
|
||||
api: "openai-completions",
|
||||
injectNumCtxForOpenAICompat: false,
|
||||
models: [],
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
providerId: "ollama",
|
||||
}),
|
||||
).toBe(false);
|
||||
});
|
||||
});
|
||||
|
||||
describe("shouldInjectOllamaCompatNumCtx", () => {
|
||||
it("requires openai-completions adapter", () => {
|
||||
expect(
|
||||
shouldInjectOllamaCompatNumCtx({
|
||||
model: {
|
||||
provider: "ollama",
|
||||
api: "openai-responses",
|
||||
baseUrl: "http://127.0.0.1:11434/v1",
|
||||
},
|
||||
}),
|
||||
).toBe(false);
|
||||
});
|
||||
|
||||
it("respects provider flag disablement", () => {
|
||||
expect(
|
||||
shouldInjectOllamaCompatNumCtx({
|
||||
model: {
|
||||
provider: "ollama",
|
||||
api: "openai-completions",
|
||||
baseUrl: "http://127.0.0.1:11434/v1",
|
||||
},
|
||||
config: {
|
||||
models: {
|
||||
providers: {
|
||||
ollama: {
|
||||
baseUrl: "http://127.0.0.1:11434/v1",
|
||||
api: "openai-completions",
|
||||
injectNumCtxForOpenAICompat: false,
|
||||
models: [],
|
||||
},
|
||||
},
|
||||
},
|
||||
},
|
||||
providerId: "ollama",
|
||||
}),
|
||||
).toBe(false);
|
||||
});
|
||||
});
|
||||
|
||||
@@ -40,7 +40,7 @@ import { resolveOpenClawDocsPath } from "../../docs-path.js";
|
||||
import { isTimeoutError } from "../../failover-error.js";
|
||||
import { resolveImageSanitizationLimits } from "../../image-sanitization.js";
|
||||
import { resolveModelAuthMode } from "../../model-auth.js";
|
||||
import { resolveDefaultModelForAgent } from "../../model-selection.js";
|
||||
import { normalizeProviderId, resolveDefaultModelForAgent } from "../../model-selection.js";
|
||||
import { createOllamaStreamFn, OLLAMA_NATIVE_BASE_URL } from "../../ollama-stream.js";
|
||||
import { resolveOwnerDisplaySetting } from "../../owner-display.js";
|
||||
import {
|
||||
@@ -127,6 +127,104 @@ type PromptBuildHookRunner = {
|
||||
) => Promise<PluginHookBeforeAgentStartResult | undefined>;
|
||||
};
|
||||
|
||||
export function isOllamaCompatProvider(model: {
|
||||
provider?: string;
|
||||
baseUrl?: string;
|
||||
api?: string;
|
||||
}): boolean {
|
||||
const providerId = normalizeProviderId(model.provider ?? "");
|
||||
if (providerId === "ollama") {
|
||||
return true;
|
||||
}
|
||||
if (!model.baseUrl) {
|
||||
return false;
|
||||
}
|
||||
try {
|
||||
const parsed = new URL(model.baseUrl);
|
||||
const hostname = parsed.hostname.toLowerCase();
|
||||
const isLocalhost =
|
||||
hostname === "localhost" ||
|
||||
hostname === "127.0.0.1" ||
|
||||
hostname === "::1" ||
|
||||
hostname === "[::1]";
|
||||
if (isLocalhost && parsed.port === "11434") {
|
||||
return true;
|
||||
}
|
||||
|
||||
// Allow remote/LAN Ollama OpenAI-compatible endpoints when the provider id
|
||||
// itself indicates Ollama usage (e.g. "my-ollama").
|
||||
const providerHintsOllama = providerId.includes("ollama");
|
||||
const isOllamaPort = parsed.port === "11434";
|
||||
const isOllamaCompatPath = parsed.pathname === "/" || /^\/v1\/?$/i.test(parsed.pathname);
|
||||
return providerHintsOllama && isOllamaPort && isOllamaCompatPath;
|
||||
} catch {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
export function resolveOllamaCompatNumCtxEnabled(params: {
|
||||
config?: OpenClawConfig;
|
||||
providerId?: string;
|
||||
}): boolean {
|
||||
const providerId = params.providerId?.trim();
|
||||
if (!providerId) {
|
||||
return true;
|
||||
}
|
||||
const providers = params.config?.models?.providers;
|
||||
if (!providers) {
|
||||
return true;
|
||||
}
|
||||
const direct = providers[providerId];
|
||||
if (direct) {
|
||||
return direct.injectNumCtxForOpenAICompat ?? true;
|
||||
}
|
||||
const normalized = normalizeProviderId(providerId);
|
||||
for (const [candidateId, candidate] of Object.entries(providers)) {
|
||||
if (normalizeProviderId(candidateId) === normalized) {
|
||||
return candidate.injectNumCtxForOpenAICompat ?? true;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
export function shouldInjectOllamaCompatNumCtx(params: {
|
||||
model: { api?: string; provider?: string; baseUrl?: string };
|
||||
config?: OpenClawConfig;
|
||||
providerId?: string;
|
||||
}): boolean {
|
||||
// Restrict to the OpenAI-compatible adapter path only.
|
||||
if (params.model.api !== "openai-completions") {
|
||||
return false;
|
||||
}
|
||||
if (!isOllamaCompatProvider(params.model)) {
|
||||
return false;
|
||||
}
|
||||
return resolveOllamaCompatNumCtxEnabled({
|
||||
config: params.config,
|
||||
providerId: params.providerId,
|
||||
});
|
||||
}
|
||||
|
||||
export function wrapOllamaCompatNumCtx(baseFn: StreamFn | undefined, numCtx: number): StreamFn {
|
||||
const streamFn = baseFn ?? streamSimple;
|
||||
return (model, context, options) =>
|
||||
streamFn(model, context, {
|
||||
...options,
|
||||
onPayload: (payload: unknown) => {
|
||||
if (!payload || typeof payload !== "object") {
|
||||
options?.onPayload?.(payload);
|
||||
return;
|
||||
}
|
||||
const payloadRecord = payload as Record<string, unknown>;
|
||||
if (!payloadRecord.options || typeof payloadRecord.options !== "object") {
|
||||
payloadRecord.options = {};
|
||||
}
|
||||
(payloadRecord.options as Record<string, unknown>).num_ctx = numCtx;
|
||||
options?.onPayload?.(payload);
|
||||
},
|
||||
});
|
||||
}
|
||||
|
||||
function trimWhitespaceFromToolCallNamesInMessage(message: unknown): void {
|
||||
if (!message || typeof message !== "object") {
|
||||
return;
|
||||
@@ -773,6 +871,27 @@ export async function runEmbeddedAttempt(
|
||||
activeSession.agent.streamFn = streamSimple;
|
||||
}
|
||||
|
||||
// Ollama with OpenAI-compatible API needs num_ctx in payload.options.
|
||||
// Otherwise Ollama defaults to a 4096 context window.
|
||||
const providerIdForNumCtx =
|
||||
typeof params.model.provider === "string" && params.model.provider.trim().length > 0
|
||||
? params.model.provider
|
||||
: params.provider;
|
||||
const shouldInjectNumCtx = shouldInjectOllamaCompatNumCtx({
|
||||
model: params.model,
|
||||
config: params.config,
|
||||
providerId: providerIdForNumCtx,
|
||||
});
|
||||
if (shouldInjectNumCtx) {
|
||||
const numCtx = Math.max(
|
||||
1,
|
||||
Math.floor(
|
||||
params.model.contextWindow ?? params.model.maxTokens ?? DEFAULT_CONTEXT_TOKENS,
|
||||
),
|
||||
);
|
||||
activeSession.agent.streamFn = wrapOllamaCompatNumCtx(activeSession.agent.streamFn, numCtx);
|
||||
}
|
||||
|
||||
applyExtraParamsToAgent(
|
||||
activeSession.agent,
|
||||
params.config,
|
||||
|
||||
Reference in New Issue
Block a user