mirror of
https://github.com/openclaw/openclaw.git
synced 2026-05-11 00:14:34 +00:00
fix(ollama): unify context window handling across discovery, merge, and OpenAI-compat transport (#29205)
* fix(ollama): inject num_ctx for OpenAI-compatible transport * fix(ollama): discover per-model context and preserve higher limits * fix(agents): prefer matching provider model for fallback limits * fix(types): require numeric token limits in provider model merge * fix(types): accept unknown payload in ollama num_ctx wrapper * fix(types): simplify ollama settled-result extraction * config(models): add provider flag for Ollama OpenAI num_ctx injection * config(schema): allow provider num_ctx injection flag * config(labels): label provider num_ctx injection flag * config(help): document provider num_ctx injection flag * agents(ollama): gate OpenAI num_ctx injection with provider config * tests(ollama): cover provider num_ctx injection flag behavior * docs(config): list provider num_ctx injection option * docs(ollama): document OpenAI num_ctx injection toggle * docs(config): clarify merge token-limit precedence * config(help): note merge uses higher model token limits * fix(ollama): cap /api/show discovery concurrency * fix(ollama): restrict num_ctx injection to OpenAI compat * tests(ollama): cover ipv6 and compat num_ctx gating * fix(ollama): detect remote compat endpoints for ollama-labeled providers * fix(ollama): cap per-model /api/show lookups to bound discovery load
This commit is contained in:
@@ -144,6 +144,8 @@ const QWEN_PORTAL_DEFAULT_COST = {
|
||||
|
||||
const OLLAMA_BASE_URL = OLLAMA_NATIVE_BASE_URL;
|
||||
const OLLAMA_API_BASE_URL = OLLAMA_BASE_URL;
|
||||
const OLLAMA_SHOW_CONCURRENCY = 8;
|
||||
const OLLAMA_SHOW_MAX_MODELS = 200;
|
||||
const OLLAMA_DEFAULT_CONTEXT_WINDOW = 128000;
|
||||
const OLLAMA_DEFAULT_MAX_TOKENS = 8192;
|
||||
const OLLAMA_DEFAULT_COST = {
|
||||
@@ -236,6 +238,38 @@ export function resolveOllamaApiBase(configuredBaseUrl?: string): string {
|
||||
return trimmed.replace(/\/v1$/i, "");
|
||||
}
|
||||
|
||||
async function queryOllamaContextWindow(
|
||||
apiBase: string,
|
||||
modelName: string,
|
||||
): Promise<number | undefined> {
|
||||
try {
|
||||
const response = await fetch(`${apiBase}/api/show`, {
|
||||
method: "POST",
|
||||
headers: { "Content-Type": "application/json" },
|
||||
body: JSON.stringify({ name: modelName }),
|
||||
signal: AbortSignal.timeout(3000),
|
||||
});
|
||||
if (!response.ok) {
|
||||
return undefined;
|
||||
}
|
||||
const data = (await response.json()) as { model_info?: Record<string, unknown> };
|
||||
if (!data.model_info) {
|
||||
return undefined;
|
||||
}
|
||||
for (const [key, value] of Object.entries(data.model_info)) {
|
||||
if (key.endsWith(".context_length") && typeof value === "number" && Number.isFinite(value)) {
|
||||
const contextWindow = Math.floor(value);
|
||||
if (contextWindow > 0) {
|
||||
return contextWindow;
|
||||
}
|
||||
}
|
||||
}
|
||||
return undefined;
|
||||
} catch {
|
||||
return undefined;
|
||||
}
|
||||
}
|
||||
|
||||
async function discoverOllamaModels(
|
||||
baseUrl?: string,
|
||||
opts?: { quiet?: boolean },
|
||||
@@ -260,20 +294,35 @@ async function discoverOllamaModels(
|
||||
log.debug("No Ollama models found on local instance");
|
||||
return [];
|
||||
}
|
||||
return data.models.map((model) => {
|
||||
const modelId = model.name;
|
||||
const isReasoning =
|
||||
modelId.toLowerCase().includes("r1") || modelId.toLowerCase().includes("reasoning");
|
||||
return {
|
||||
id: modelId,
|
||||
name: modelId,
|
||||
reasoning: isReasoning,
|
||||
input: ["text"],
|
||||
cost: OLLAMA_DEFAULT_COST,
|
||||
contextWindow: OLLAMA_DEFAULT_CONTEXT_WINDOW,
|
||||
maxTokens: OLLAMA_DEFAULT_MAX_TOKENS,
|
||||
};
|
||||
});
|
||||
const modelsToInspect = data.models.slice(0, OLLAMA_SHOW_MAX_MODELS);
|
||||
if (modelsToInspect.length < data.models.length && !opts?.quiet) {
|
||||
log.warn(
|
||||
`Capping Ollama /api/show inspection to ${OLLAMA_SHOW_MAX_MODELS} models (received ${data.models.length})`,
|
||||
);
|
||||
}
|
||||
const discovered: ModelDefinitionConfig[] = [];
|
||||
for (let index = 0; index < modelsToInspect.length; index += OLLAMA_SHOW_CONCURRENCY) {
|
||||
const batch = modelsToInspect.slice(index, index + OLLAMA_SHOW_CONCURRENCY);
|
||||
const batchDiscovered = await Promise.all(
|
||||
batch.map(async (model) => {
|
||||
const modelId = model.name;
|
||||
const contextWindow = await queryOllamaContextWindow(apiBase, modelId);
|
||||
const isReasoning =
|
||||
modelId.toLowerCase().includes("r1") || modelId.toLowerCase().includes("reasoning");
|
||||
return {
|
||||
id: modelId,
|
||||
name: modelId,
|
||||
reasoning: isReasoning,
|
||||
input: ["text"],
|
||||
cost: OLLAMA_DEFAULT_COST,
|
||||
contextWindow: contextWindow ?? OLLAMA_DEFAULT_CONTEXT_WINDOW,
|
||||
maxTokens: OLLAMA_DEFAULT_MAX_TOKENS,
|
||||
} satisfies ModelDefinitionConfig;
|
||||
}),
|
||||
);
|
||||
discovered.push(...batchDiscovered);
|
||||
}
|
||||
return discovered;
|
||||
} catch (error) {
|
||||
if (!opts?.quiet) {
|
||||
log.warn(`Failed to discover Ollama models: ${String(error)}`);
|
||||
|
||||
Reference in New Issue
Block a user