fix(venice): harden discovery limits and tool support (#38306)

* Config: add supportsTools compat flag

* Agents: add model tool support helper

* Venice: sync discovery and fallback metadata

* Agents: skip tools for unsupported models

* Changelog: note Venice provider hardening

* Update CHANGELOG.md

* Venice: cap degraded discovery metadata

* Apply suggestion from @greptile-apps[bot]

Co-authored-by: greptile-apps[bot] <165735046+greptile-apps[bot]@users.noreply.github.com>

* Venice: tolerate partial discovery capabilities

* Venice: tolerate missing discovery specs

---------

Co-authored-by: greptile-apps[bot] <165735046+greptile-apps[bot]@users.noreply.github.com>
This commit is contained in:
Vincent Koc
2026-03-06 19:07:11 -05:00
committed by GitHub
parent 942c53e7f0
commit 5320ee7731
9 changed files with 556 additions and 86 deletions

View File

@@ -0,0 +1,16 @@
import { describe, expect, it } from "vitest";
import { supportsModelTools } from "./model-tool-support.js";
describe("supportsModelTools", () => {
it("defaults to true when the model has no compat override", () => {
expect(supportsModelTools({} as never)).toBe(true);
});
it("returns true when compat.supportsTools is true", () => {
expect(supportsModelTools({ compat: { supportsTools: true } } as never)).toBe(true);
});
it("returns false when compat.supportsTools is false", () => {
expect(supportsModelTools({ compat: { supportsTools: false } } as never)).toBe(false);
});
});

View File

@@ -0,0 +1,7 @@
export function supportsModelTools(model: { compat?: unknown }): boolean {
const compat =
model.compat && typeof model.compat === "object"
? (model.compat as { supportsTools?: boolean })
: undefined;
return compat?.supportsTools !== false;
}

View File

@@ -38,6 +38,7 @@ import { formatUserTime, resolveUserTimeFormat, resolveUserTimezone } from "../d
import { DEFAULT_CONTEXT_TOKENS, DEFAULT_MODEL, DEFAULT_PROVIDER } from "../defaults.js";
import { resolveOpenClawDocsPath } from "../docs-path.js";
import { getApiKeyForModel, resolveModelAuthMode } from "../model-auth.js";
import { supportsModelTools } from "../model-tool-support.js";
import { ensureOpenClawModelsJson } from "../models-config.js";
import { resolveOwnerDisplaySetting } from "../owner-display.js";
import {
@@ -400,7 +401,10 @@ export async function compactEmbeddedPiSessionDirect(
modelContextWindowTokens: model.contextWindow,
modelAuthMode: resolveModelAuthMode(model.provider, params.config),
});
const tools = sanitizeToolsForGoogle({ tools: toolsRaw, provider });
const tools = sanitizeToolsForGoogle({
tools: supportsModelTools(model) ? toolsRaw : [],
provider,
});
const allowedToolNames = collectAllowedToolNames({ tools });
logToolSchemasForGoogle({ tools, provider });
const machineName = await getMachineDisplayName();

View File

@@ -49,6 +49,7 @@ import { isTimeoutError } from "../../failover-error.js";
import { resolveImageSanitizationLimits } from "../../image-sanitization.js";
import { resolveModelAuthMode } from "../../model-auth.js";
import { normalizeProviderId, resolveDefaultModelForAgent } from "../../model-selection.js";
import { supportsModelTools } from "../../model-tool-support.js";
import { createOllamaStreamFn, OLLAMA_NATIVE_BASE_URL } from "../../ollama-stream.js";
import { createOpenAIWebSocketStreamFn, releaseWsSession } from "../../openai-ws-stream.js";
import { resolveOwnerDisplaySetting } from "../../owner-display.js";
@@ -878,10 +879,15 @@ export async function runEmbeddedAttempt(
params.requireExplicitMessageTarget ?? isSubagentSessionKey(params.sessionKey),
disableMessageTool: params.disableMessageTool,
});
const tools = sanitizeToolsForGoogle({ tools: toolsRaw, provider: params.provider });
const toolsEnabled = supportsModelTools(params.model);
const tools = sanitizeToolsForGoogle({
tools: toolsEnabled ? toolsRaw : [],
provider: params.provider,
});
const clientTools = toolsEnabled ? params.clientTools : undefined;
const allowedToolNames = collectAllowedToolNames({
tools,
clientTools: params.clientTools,
clientTools,
});
logToolSchemasForGoogle({ tools, provider: params.provider });
@@ -1146,9 +1152,9 @@ export async function runEmbeddedAttempt(
cfg: params.config,
agentId: sessionAgentId,
});
const clientToolDefs = params.clientTools
const clientToolDefs = clientTools
? toClientToolDefinitions(
params.clientTools,
clientTools,
(toolName, toolParams) => {
clientToolCallDetected = { name: toolName, params: toolParams };
},

View File

@@ -42,6 +42,7 @@ function makeModelsResponse(id: string): Response {
name: id,
privacy: "private",
availableContextTokens: 131072,
maxCompletionTokens: 4096,
capabilities: {
supportsReasoning: false,
supportsVision: false,
@@ -94,6 +95,239 @@ describe("venice-models", () => {
expect(models.map((m) => m.id)).toContain("llama-3.3-70b");
});
it("uses API maxCompletionTokens for catalog models when present", async () => {
const fetchMock = vi.fn(
async () =>
new Response(
JSON.stringify({
data: [
{
id: "llama-3.3-70b",
model_spec: {
name: "llama-3.3-70b",
privacy: "private",
availableContextTokens: 131072,
maxCompletionTokens: 2048,
capabilities: {
supportsReasoning: false,
supportsVision: false,
supportsFunctionCalling: true,
},
},
},
],
}),
{
status: 200,
headers: { "Content-Type": "application/json" },
},
),
);
vi.stubGlobal("fetch", fetchMock as unknown as typeof fetch);
const models = await runWithDiscoveryEnabled(() => discoverVeniceModels());
const llama = models.find((m) => m.id === "llama-3.3-70b");
expect(llama?.maxTokens).toBe(2048);
});
it("retains catalog maxTokens when the API omits maxCompletionTokens", async () => {
const fetchMock = vi.fn(
async () =>
new Response(
JSON.stringify({
data: [
{
id: "qwen3-235b-a22b-instruct-2507",
model_spec: {
name: "qwen3-235b-a22b-instruct-2507",
privacy: "private",
availableContextTokens: 131072,
capabilities: {
supportsReasoning: false,
supportsVision: false,
supportsFunctionCalling: true,
},
},
},
],
}),
{
status: 200,
headers: { "Content-Type": "application/json" },
},
),
);
vi.stubGlobal("fetch", fetchMock as unknown as typeof fetch);
const models = await runWithDiscoveryEnabled(() => discoverVeniceModels());
const qwen = models.find((m) => m.id === "qwen3-235b-a22b-instruct-2507");
expect(qwen?.maxTokens).toBe(16384);
});
it("disables tools for catalog models that do not support function calling", () => {
const model = buildVeniceModelDefinition(
VENICE_MODEL_CATALOG.find((entry) => entry.id === "deepseek-v3.2")!,
);
expect(model.compat?.supportsTools).toBe(false);
});
it("uses a conservative bounded maxTokens value for new models", async () => {
const fetchMock = vi.fn(
async () =>
new Response(
JSON.stringify({
data: [
{
id: "new-model-2026",
model_spec: {
name: "new-model-2026",
privacy: "private",
availableContextTokens: 50_000,
maxCompletionTokens: 200_000,
capabilities: {
supportsReasoning: false,
supportsVision: false,
supportsFunctionCalling: false,
},
},
},
],
}),
{
status: 200,
headers: { "Content-Type": "application/json" },
},
),
);
vi.stubGlobal("fetch", fetchMock as unknown as typeof fetch);
const models = await runWithDiscoveryEnabled(() => discoverVeniceModels());
const newModel = models.find((m) => m.id === "new-model-2026");
expect(newModel?.maxTokens).toBe(50000);
expect(newModel?.maxTokens).toBeLessThanOrEqual(newModel?.contextWindow ?? Infinity);
expect(newModel?.compat?.supportsTools).toBe(false);
});
it("caps new-model maxTokens to the fallback context window when API context is missing", async () => {
const fetchMock = vi.fn(
async () =>
new Response(
JSON.stringify({
data: [
{
id: "new-model-without-context",
model_spec: {
name: "new-model-without-context",
privacy: "private",
maxCompletionTokens: 200_000,
capabilities: {
supportsReasoning: false,
supportsVision: false,
supportsFunctionCalling: true,
},
},
},
],
}),
{
status: 200,
headers: { "Content-Type": "application/json" },
},
),
);
vi.stubGlobal("fetch", fetchMock as unknown as typeof fetch);
const models = await runWithDiscoveryEnabled(() => discoverVeniceModels());
const newModel = models.find((m) => m.id === "new-model-without-context");
expect(newModel?.contextWindow).toBe(128000);
expect(newModel?.maxTokens).toBe(128000);
});
it("ignores missing capabilities on partial metadata instead of aborting discovery", async () => {
const fetchMock = vi.fn(
async () =>
new Response(
JSON.stringify({
data: [
{
id: "llama-3.3-70b",
model_spec: {
name: "llama-3.3-70b",
privacy: "private",
availableContextTokens: 131072,
maxCompletionTokens: 2048,
},
},
{
id: "new-model-partial",
model_spec: {
name: "new-model-partial",
privacy: "private",
maxCompletionTokens: 2048,
},
},
],
}),
{
status: 200,
headers: { "Content-Type": "application/json" },
},
),
);
vi.stubGlobal("fetch", fetchMock as unknown as typeof fetch);
const models = await runWithDiscoveryEnabled(() => discoverVeniceModels());
const knownModel = models.find((m) => m.id === "llama-3.3-70b");
const partialModel = models.find((m) => m.id === "new-model-partial");
expect(models).not.toHaveLength(VENICE_MODEL_CATALOG.length);
expect(knownModel?.maxTokens).toBe(2048);
expect(partialModel?.contextWindow).toBe(128000);
expect(partialModel?.maxTokens).toBe(2048);
expect(partialModel?.compat?.supportsTools).toBeUndefined();
});
it("keeps known models discoverable when a row omits model_spec", async () => {
const fetchMock = vi.fn(
async () =>
new Response(
JSON.stringify({
data: [
{
id: "llama-3.3-70b",
},
{
id: "new-model-valid",
model_spec: {
name: "new-model-valid",
privacy: "private",
availableContextTokens: 32_000,
maxCompletionTokens: 2_048,
capabilities: {
supportsReasoning: false,
supportsVision: false,
supportsFunctionCalling: true,
},
},
},
],
}),
{
status: 200,
headers: { "Content-Type": "application/json" },
},
),
);
vi.stubGlobal("fetch", fetchMock as unknown as typeof fetch);
const models = await runWithDiscoveryEnabled(() => discoverVeniceModels());
const knownModel = models.find((m) => m.id === "llama-3.3-70b");
const newModel = models.find((m) => m.id === "new-model-valid");
expect(models).not.toHaveLength(VENICE_MODEL_CATALOG.length);
expect(knownModel?.maxTokens).toBe(4096);
expect(newModel?.contextWindow).toBe(32000);
expect(newModel?.maxTokens).toBe(2048);
});
it("falls back to static catalog after retry budget is exhausted", async () => {
const fetchMock = vi.fn(async () => {
throw Object.assign(new TypeError("fetch failed"), {

View File

@@ -17,6 +17,9 @@ export const VENICE_DEFAULT_COST = {
cacheWrite: 0,
};
const VENICE_DEFAULT_CONTEXT_WINDOW = 128_000;
const VENICE_DEFAULT_MAX_TOKENS = 4096;
const VENICE_DISCOVERY_HARD_MAX_TOKENS = 131_072;
const VENICE_DISCOVERY_TIMEOUT_MS = 10_000;
const VENICE_DISCOVERY_RETRYABLE_HTTP_STATUS = new Set([408, 425, 429, 500, 502, 503, 504]);
const VENICE_DISCOVERY_RETRYABLE_NETWORK_CODES = new Set([
@@ -59,8 +62,8 @@ export const VENICE_MODEL_CATALOG = [
name: "Llama 3.3 70B",
reasoning: false,
input: ["text"],
contextWindow: 131072,
maxTokens: 8192,
contextWindow: 128000,
maxTokens: 4096,
privacy: "private",
},
{
@@ -68,8 +71,8 @@ export const VENICE_MODEL_CATALOG = [
name: "Llama 3.2 3B",
reasoning: false,
input: ["text"],
contextWindow: 131072,
maxTokens: 8192,
contextWindow: 128000,
maxTokens: 4096,
privacy: "private",
},
{
@@ -77,8 +80,9 @@ export const VENICE_MODEL_CATALOG = [
name: "Hermes 3 Llama 3.1 405B",
reasoning: false,
input: ["text"],
contextWindow: 131072,
maxTokens: 8192,
contextWindow: 128000,
maxTokens: 16384,
supportsTools: false,
privacy: "private",
},
@@ -88,8 +92,8 @@ export const VENICE_MODEL_CATALOG = [
name: "Qwen3 235B Thinking",
reasoning: true,
input: ["text"],
contextWindow: 131072,
maxTokens: 8192,
contextWindow: 128000,
maxTokens: 16384,
privacy: "private",
},
{
@@ -97,8 +101,8 @@ export const VENICE_MODEL_CATALOG = [
name: "Qwen3 235B Instruct",
reasoning: false,
input: ["text"],
contextWindow: 131072,
maxTokens: 8192,
contextWindow: 128000,
maxTokens: 16384,
privacy: "private",
},
{
@@ -106,8 +110,26 @@ export const VENICE_MODEL_CATALOG = [
name: "Qwen3 Coder 480B",
reasoning: false,
input: ["text"],
contextWindow: 262144,
maxTokens: 8192,
contextWindow: 256000,
maxTokens: 65536,
privacy: "private",
},
{
id: "qwen3-coder-480b-a35b-instruct-turbo",
name: "Qwen3 Coder 480B Turbo",
reasoning: false,
input: ["text"],
contextWindow: 256000,
maxTokens: 65536,
privacy: "private",
},
{
id: "qwen3-5-35b-a3b",
name: "Qwen3.5 35B A3B",
reasoning: true,
input: ["text", "image"],
contextWindow: 256000,
maxTokens: 65536,
privacy: "private",
},
{
@@ -115,8 +137,8 @@ export const VENICE_MODEL_CATALOG = [
name: "Qwen3 Next 80B",
reasoning: false,
input: ["text"],
contextWindow: 262144,
maxTokens: 8192,
contextWindow: 256000,
maxTokens: 16384,
privacy: "private",
},
{
@@ -124,8 +146,8 @@ export const VENICE_MODEL_CATALOG = [
name: "Qwen3 VL 235B (Vision)",
reasoning: false,
input: ["text", "image"],
contextWindow: 262144,
maxTokens: 8192,
contextWindow: 256000,
maxTokens: 16384,
privacy: "private",
},
{
@@ -133,8 +155,8 @@ export const VENICE_MODEL_CATALOG = [
name: "Venice Small (Qwen3 4B)",
reasoning: true,
input: ["text"],
contextWindow: 32768,
maxTokens: 8192,
contextWindow: 32000,
maxTokens: 4096,
privacy: "private",
},
@@ -144,8 +166,9 @@ export const VENICE_MODEL_CATALOG = [
name: "DeepSeek V3.2",
reasoning: true,
input: ["text"],
contextWindow: 163840,
maxTokens: 8192,
contextWindow: 160000,
maxTokens: 32768,
supportsTools: false,
privacy: "private",
},
@@ -155,8 +178,9 @@ export const VENICE_MODEL_CATALOG = [
name: "Venice Uncensored (Dolphin-Mistral)",
reasoning: false,
input: ["text"],
contextWindow: 32768,
maxTokens: 8192,
contextWindow: 32000,
maxTokens: 4096,
supportsTools: false,
privacy: "private",
},
{
@@ -164,8 +188,8 @@ export const VENICE_MODEL_CATALOG = [
name: "Venice Medium (Mistral)",
reasoning: false,
input: ["text", "image"],
contextWindow: 131072,
maxTokens: 8192,
contextWindow: 128000,
maxTokens: 4096,
privacy: "private",
},
@@ -175,8 +199,8 @@ export const VENICE_MODEL_CATALOG = [
name: "Google Gemma 3 27B Instruct",
reasoning: false,
input: ["text", "image"],
contextWindow: 202752,
maxTokens: 8192,
contextWindow: 198000,
maxTokens: 16384,
privacy: "private",
},
{
@@ -184,8 +208,35 @@ export const VENICE_MODEL_CATALOG = [
name: "OpenAI GPT OSS 120B",
reasoning: false,
input: ["text"],
contextWindow: 131072,
maxTokens: 8192,
contextWindow: 128000,
maxTokens: 16384,
privacy: "private",
},
{
id: "nvidia-nemotron-3-nano-30b-a3b",
name: "NVIDIA Nemotron 3 Nano 30B",
reasoning: false,
input: ["text"],
contextWindow: 128000,
maxTokens: 16384,
privacy: "private",
},
{
id: "olafangensan-glm-4.7-flash-heretic",
name: "GLM 4.7 Flash Heretic",
reasoning: true,
input: ["text"],
contextWindow: 128000,
maxTokens: 24000,
privacy: "private",
},
{
id: "zai-org-glm-4.6",
name: "GLM 4.6",
reasoning: false,
input: ["text"],
contextWindow: 198000,
maxTokens: 16384,
privacy: "private",
},
{
@@ -193,8 +244,62 @@ export const VENICE_MODEL_CATALOG = [
name: "GLM 4.7",
reasoning: true,
input: ["text"],
contextWindow: 202752,
maxTokens: 8192,
contextWindow: 198000,
maxTokens: 16384,
privacy: "private",
},
{
id: "zai-org-glm-4.7-flash",
name: "GLM 4.7 Flash",
reasoning: true,
input: ["text"],
contextWindow: 128000,
maxTokens: 16384,
privacy: "private",
},
{
id: "zai-org-glm-5",
name: "GLM 5",
reasoning: true,
input: ["text"],
contextWindow: 198000,
maxTokens: 32000,
privacy: "private",
},
{
id: "kimi-k2-5",
name: "Kimi K2.5",
reasoning: true,
input: ["text", "image"],
contextWindow: 256000,
maxTokens: 65536,
privacy: "private",
},
{
id: "kimi-k2-thinking",
name: "Kimi K2 Thinking",
reasoning: true,
input: ["text"],
contextWindow: 256000,
maxTokens: 65536,
privacy: "private",
},
{
id: "minimax-m21",
name: "MiniMax M2.1",
reasoning: true,
input: ["text"],
contextWindow: 198000,
maxTokens: 32768,
privacy: "private",
},
{
id: "minimax-m25",
name: "MiniMax M2.5",
reasoning: true,
input: ["text"],
contextWindow: 198000,
maxTokens: 32768,
privacy: "private",
},
@@ -205,21 +310,39 @@ export const VENICE_MODEL_CATALOG = [
// Anthropic (via Venice)
{
id: "claude-opus-45",
id: "claude-opus-4-5",
name: "Claude Opus 4.5 (via Venice)",
reasoning: true,
input: ["text", "image"],
contextWindow: 202752,
maxTokens: 8192,
contextWindow: 198000,
maxTokens: 32768,
privacy: "anonymized",
},
{
id: "claude-sonnet-45",
id: "claude-opus-4-6",
name: "Claude Opus 4.6 (via Venice)",
reasoning: true,
input: ["text", "image"],
contextWindow: 1000000,
maxTokens: 128000,
privacy: "anonymized",
},
{
id: "claude-sonnet-4-5",
name: "Claude Sonnet 4.5 (via Venice)",
reasoning: true,
input: ["text", "image"],
contextWindow: 202752,
maxTokens: 8192,
contextWindow: 198000,
maxTokens: 64000,
privacy: "anonymized",
},
{
id: "claude-sonnet-4-6",
name: "Claude Sonnet 4.6 (via Venice)",
reasoning: true,
input: ["text", "image"],
contextWindow: 1000000,
maxTokens: 64000,
privacy: "anonymized",
},
@@ -229,8 +352,8 @@ export const VENICE_MODEL_CATALOG = [
name: "GPT-5.2 (via Venice)",
reasoning: true,
input: ["text"],
contextWindow: 262144,
maxTokens: 8192,
contextWindow: 256000,
maxTokens: 65536,
privacy: "anonymized",
},
{
@@ -238,8 +361,44 @@ export const VENICE_MODEL_CATALOG = [
name: "GPT-5.2 Codex (via Venice)",
reasoning: true,
input: ["text", "image"],
contextWindow: 262144,
maxTokens: 8192,
contextWindow: 256000,
maxTokens: 65536,
privacy: "anonymized",
},
{
id: "openai-gpt-53-codex",
name: "GPT-5.3 Codex (via Venice)",
reasoning: true,
input: ["text", "image"],
contextWindow: 400000,
maxTokens: 128000,
privacy: "anonymized",
},
{
id: "openai-gpt-54",
name: "GPT-5.4 (via Venice)",
reasoning: true,
input: ["text", "image"],
contextWindow: 1000000,
maxTokens: 131072,
privacy: "anonymized",
},
{
id: "openai-gpt-4o-2024-11-20",
name: "GPT-4o (via Venice)",
reasoning: false,
input: ["text", "image"],
contextWindow: 128000,
maxTokens: 16384,
privacy: "anonymized",
},
{
id: "openai-gpt-4o-mini-2024-07-18",
name: "GPT-4o Mini (via Venice)",
reasoning: false,
input: ["text", "image"],
contextWindow: 128000,
maxTokens: 16384,
privacy: "anonymized",
},
@@ -249,8 +408,17 @@ export const VENICE_MODEL_CATALOG = [
name: "Gemini 3 Pro (via Venice)",
reasoning: true,
input: ["text", "image"],
contextWindow: 202752,
maxTokens: 8192,
contextWindow: 198000,
maxTokens: 32768,
privacy: "anonymized",
},
{
id: "gemini-3-1-pro-preview",
name: "Gemini 3.1 Pro (via Venice)",
reasoning: true,
input: ["text", "image"],
contextWindow: 1000000,
maxTokens: 32768,
privacy: "anonymized",
},
{
@@ -258,8 +426,8 @@ export const VENICE_MODEL_CATALOG = [
name: "Gemini 3 Flash (via Venice)",
reasoning: true,
input: ["text", "image"],
contextWindow: 262144,
maxTokens: 8192,
contextWindow: 256000,
maxTokens: 65536,
privacy: "anonymized",
},
@@ -269,8 +437,8 @@ export const VENICE_MODEL_CATALOG = [
name: "Grok 4.1 Fast (via Venice)",
reasoning: true,
input: ["text", "image"],
contextWindow: 262144,
maxTokens: 8192,
contextWindow: 1000000,
maxTokens: 30000,
privacy: "anonymized",
},
{
@@ -278,28 +446,8 @@ export const VENICE_MODEL_CATALOG = [
name: "Grok Code Fast 1 (via Venice)",
reasoning: true,
input: ["text"],
contextWindow: 262144,
maxTokens: 8192,
privacy: "anonymized",
},
// Other anonymized models
{
id: "kimi-k2-thinking",
name: "Kimi K2 Thinking (via Venice)",
reasoning: true,
input: ["text"],
contextWindow: 262144,
maxTokens: 8192,
privacy: "anonymized",
},
{
id: "minimax-m21",
name: "MiniMax M2.5 (via Venice)",
reasoning: true,
input: ["text"],
contextWindow: 202752,
maxTokens: 8192,
contextWindow: 256000,
maxTokens: 10000,
privacy: "anonymized",
},
] as const;
@@ -326,6 +474,7 @@ export function buildVeniceModelDefinition(entry: VeniceCatalogEntry): ModelDefi
// See: https://github.com/openclaw/openclaw/issues/15819
compat: {
supportsUsageInStreaming: false,
...("supportsTools" in entry && !entry.supportsTools ? { supportsTools: false } : {}),
},
};
}
@@ -334,17 +483,18 @@ export function buildVeniceModelDefinition(entry: VeniceCatalogEntry): ModelDefi
interface VeniceModelSpec {
name: string;
privacy: "private" | "anonymized";
availableContextTokens: number;
capabilities: {
supportsReasoning: boolean;
supportsVision: boolean;
supportsFunctionCalling: boolean;
availableContextTokens?: number;
maxCompletionTokens?: number;
capabilities?: {
supportsReasoning?: boolean;
supportsVision?: boolean;
supportsFunctionCalling?: boolean;
};
}
interface VeniceModel {
id: string;
model_spec: VeniceModelSpec;
model_spec?: VeniceModelSpec;
}
interface VeniceModelsResponse {
@@ -412,6 +562,36 @@ function isRetryableVeniceDiscoveryError(err: unknown): boolean {
return hasRetryableNetworkCode(err);
}
function normalizePositiveInt(value: unknown): number | undefined {
if (typeof value !== "number" || !Number.isFinite(value) || value <= 0) {
return undefined;
}
return Math.floor(value);
}
function resolveApiMaxCompletionTokens(params: {
apiModel: VeniceModel;
knownMaxTokens?: number;
}): number | undefined {
const raw = normalizePositiveInt(params.apiModel.model_spec?.maxCompletionTokens);
if (!raw) {
return undefined;
}
const contextWindow = normalizePositiveInt(params.apiModel.model_spec?.availableContextTokens);
const knownMaxTokens =
typeof params.knownMaxTokens === "number" && Number.isFinite(params.knownMaxTokens)
? Math.floor(params.knownMaxTokens)
: undefined;
const hardCap = knownMaxTokens ?? VENICE_DISCOVERY_HARD_MAX_TOKENS;
const fallbackContextWindow = knownMaxTokens ?? VENICE_DEFAULT_CONTEXT_WINDOW;
return Math.min(raw, contextWindow ?? fallbackContextWindow, hardCap);
}
function resolveApiSupportsTools(apiModel: VeniceModel): boolean | undefined {
const supportsFunctionCalling = apiModel.model_spec?.capabilities?.supportsFunctionCalling;
return typeof supportsFunctionCalling === "boolean" ? supportsFunctionCalling : undefined;
}
/**
* Discover models from Venice API with fallback to static catalog.
* The /models endpoint is public and doesn't require authentication.
@@ -468,30 +648,50 @@ export async function discoverVeniceModels(): Promise<ModelDefinitionConfig[]> {
for (const apiModel of data.data) {
const catalogEntry = catalogById.get(apiModel.id);
const apiMaxTokens = resolveApiMaxCompletionTokens({
apiModel,
knownMaxTokens: catalogEntry?.maxTokens,
});
const apiSupportsTools = resolveApiSupportsTools(apiModel);
if (catalogEntry) {
// Use catalog metadata for known models
models.push(buildVeniceModelDefinition(catalogEntry));
const definition = buildVeniceModelDefinition(catalogEntry);
if (apiMaxTokens !== undefined) {
definition.maxTokens = apiMaxTokens;
}
// We only let live discovery disable tools. Re-enabling tool support still
// requires a catalog update so a transient/bad /models response cannot
// silently expand the tool execution surface for known models.
if (apiSupportsTools === false) {
definition.compat = {
...definition.compat,
supportsTools: false,
};
}
models.push(definition);
} else {
// Create definition for newly discovered models not in catalog
const apiSpec = apiModel.model_spec;
const isReasoning =
apiModel.model_spec.capabilities.supportsReasoning ||
apiSpec?.capabilities?.supportsReasoning ||
apiModel.id.toLowerCase().includes("thinking") ||
apiModel.id.toLowerCase().includes("reason") ||
apiModel.id.toLowerCase().includes("r1");
const hasVision = apiModel.model_spec.capabilities.supportsVision;
const hasVision = apiSpec?.capabilities?.supportsVision === true;
models.push({
id: apiModel.id,
name: apiModel.model_spec.name || apiModel.id,
name: apiSpec?.name || apiModel.id,
reasoning: isReasoning,
input: hasVision ? ["text", "image"] : ["text"],
cost: VENICE_DEFAULT_COST,
contextWindow: apiModel.model_spec.availableContextTokens || 128000,
maxTokens: 8192,
contextWindow:
normalizePositiveInt(apiSpec?.availableContextTokens) ?? VENICE_DEFAULT_CONTEXT_WINDOW,
maxTokens: apiMaxTokens ?? VENICE_DEFAULT_MAX_TOKENS,
// Avoid usage-only streaming chunks that can break OpenAI-compatible parsers.
compat: {
supportsUsageInStreaming: false,
...(apiSupportsTools === false ? { supportsTools: false } : {}),
},
});
}