mirror of
https://github.com/openclaw/openclaw.git
synced 2026-05-10 14:04:58 +00:00
fix(venice): harden discovery limits and tool support (#38306)
* Config: add supportsTools compat flag * Agents: add model tool support helper * Venice: sync discovery and fallback metadata * Agents: skip tools for unsupported models * Changelog: note Venice provider hardening * Update CHANGELOG.md * Venice: cap degraded discovery metadata * Apply suggestion from @greptile-apps[bot] Co-authored-by: greptile-apps[bot] <165735046+greptile-apps[bot]@users.noreply.github.com> * Venice: tolerate partial discovery capabilities * Venice: tolerate missing discovery specs --------- Co-authored-by: greptile-apps[bot] <165735046+greptile-apps[bot]@users.noreply.github.com>
This commit is contained in:
16
src/agents/model-tool-support.test.ts
Normal file
16
src/agents/model-tool-support.test.ts
Normal file
@@ -0,0 +1,16 @@
|
||||
import { describe, expect, it } from "vitest";
|
||||
import { supportsModelTools } from "./model-tool-support.js";
|
||||
|
||||
describe("supportsModelTools", () => {
|
||||
it("defaults to true when the model has no compat override", () => {
|
||||
expect(supportsModelTools({} as never)).toBe(true);
|
||||
});
|
||||
|
||||
it("returns true when compat.supportsTools is true", () => {
|
||||
expect(supportsModelTools({ compat: { supportsTools: true } } as never)).toBe(true);
|
||||
});
|
||||
|
||||
it("returns false when compat.supportsTools is false", () => {
|
||||
expect(supportsModelTools({ compat: { supportsTools: false } } as never)).toBe(false);
|
||||
});
|
||||
});
|
||||
7
src/agents/model-tool-support.ts
Normal file
7
src/agents/model-tool-support.ts
Normal file
@@ -0,0 +1,7 @@
|
||||
export function supportsModelTools(model: { compat?: unknown }): boolean {
|
||||
const compat =
|
||||
model.compat && typeof model.compat === "object"
|
||||
? (model.compat as { supportsTools?: boolean })
|
||||
: undefined;
|
||||
return compat?.supportsTools !== false;
|
||||
}
|
||||
@@ -38,6 +38,7 @@ import { formatUserTime, resolveUserTimeFormat, resolveUserTimezone } from "../d
|
||||
import { DEFAULT_CONTEXT_TOKENS, DEFAULT_MODEL, DEFAULT_PROVIDER } from "../defaults.js";
|
||||
import { resolveOpenClawDocsPath } from "../docs-path.js";
|
||||
import { getApiKeyForModel, resolveModelAuthMode } from "../model-auth.js";
|
||||
import { supportsModelTools } from "../model-tool-support.js";
|
||||
import { ensureOpenClawModelsJson } from "../models-config.js";
|
||||
import { resolveOwnerDisplaySetting } from "../owner-display.js";
|
||||
import {
|
||||
@@ -400,7 +401,10 @@ export async function compactEmbeddedPiSessionDirect(
|
||||
modelContextWindowTokens: model.contextWindow,
|
||||
modelAuthMode: resolveModelAuthMode(model.provider, params.config),
|
||||
});
|
||||
const tools = sanitizeToolsForGoogle({ tools: toolsRaw, provider });
|
||||
const tools = sanitizeToolsForGoogle({
|
||||
tools: supportsModelTools(model) ? toolsRaw : [],
|
||||
provider,
|
||||
});
|
||||
const allowedToolNames = collectAllowedToolNames({ tools });
|
||||
logToolSchemasForGoogle({ tools, provider });
|
||||
const machineName = await getMachineDisplayName();
|
||||
|
||||
@@ -49,6 +49,7 @@ import { isTimeoutError } from "../../failover-error.js";
|
||||
import { resolveImageSanitizationLimits } from "../../image-sanitization.js";
|
||||
import { resolveModelAuthMode } from "../../model-auth.js";
|
||||
import { normalizeProviderId, resolveDefaultModelForAgent } from "../../model-selection.js";
|
||||
import { supportsModelTools } from "../../model-tool-support.js";
|
||||
import { createOllamaStreamFn, OLLAMA_NATIVE_BASE_URL } from "../../ollama-stream.js";
|
||||
import { createOpenAIWebSocketStreamFn, releaseWsSession } from "../../openai-ws-stream.js";
|
||||
import { resolveOwnerDisplaySetting } from "../../owner-display.js";
|
||||
@@ -878,10 +879,15 @@ export async function runEmbeddedAttempt(
|
||||
params.requireExplicitMessageTarget ?? isSubagentSessionKey(params.sessionKey),
|
||||
disableMessageTool: params.disableMessageTool,
|
||||
});
|
||||
const tools = sanitizeToolsForGoogle({ tools: toolsRaw, provider: params.provider });
|
||||
const toolsEnabled = supportsModelTools(params.model);
|
||||
const tools = sanitizeToolsForGoogle({
|
||||
tools: toolsEnabled ? toolsRaw : [],
|
||||
provider: params.provider,
|
||||
});
|
||||
const clientTools = toolsEnabled ? params.clientTools : undefined;
|
||||
const allowedToolNames = collectAllowedToolNames({
|
||||
tools,
|
||||
clientTools: params.clientTools,
|
||||
clientTools,
|
||||
});
|
||||
logToolSchemasForGoogle({ tools, provider: params.provider });
|
||||
|
||||
@@ -1146,9 +1152,9 @@ export async function runEmbeddedAttempt(
|
||||
cfg: params.config,
|
||||
agentId: sessionAgentId,
|
||||
});
|
||||
const clientToolDefs = params.clientTools
|
||||
const clientToolDefs = clientTools
|
||||
? toClientToolDefinitions(
|
||||
params.clientTools,
|
||||
clientTools,
|
||||
(toolName, toolParams) => {
|
||||
clientToolCallDetected = { name: toolName, params: toolParams };
|
||||
},
|
||||
|
||||
@@ -42,6 +42,7 @@ function makeModelsResponse(id: string): Response {
|
||||
name: id,
|
||||
privacy: "private",
|
||||
availableContextTokens: 131072,
|
||||
maxCompletionTokens: 4096,
|
||||
capabilities: {
|
||||
supportsReasoning: false,
|
||||
supportsVision: false,
|
||||
@@ -94,6 +95,239 @@ describe("venice-models", () => {
|
||||
expect(models.map((m) => m.id)).toContain("llama-3.3-70b");
|
||||
});
|
||||
|
||||
it("uses API maxCompletionTokens for catalog models when present", async () => {
|
||||
const fetchMock = vi.fn(
|
||||
async () =>
|
||||
new Response(
|
||||
JSON.stringify({
|
||||
data: [
|
||||
{
|
||||
id: "llama-3.3-70b",
|
||||
model_spec: {
|
||||
name: "llama-3.3-70b",
|
||||
privacy: "private",
|
||||
availableContextTokens: 131072,
|
||||
maxCompletionTokens: 2048,
|
||||
capabilities: {
|
||||
supportsReasoning: false,
|
||||
supportsVision: false,
|
||||
supportsFunctionCalling: true,
|
||||
},
|
||||
},
|
||||
},
|
||||
],
|
||||
}),
|
||||
{
|
||||
status: 200,
|
||||
headers: { "Content-Type": "application/json" },
|
||||
},
|
||||
),
|
||||
);
|
||||
vi.stubGlobal("fetch", fetchMock as unknown as typeof fetch);
|
||||
|
||||
const models = await runWithDiscoveryEnabled(() => discoverVeniceModels());
|
||||
const llama = models.find((m) => m.id === "llama-3.3-70b");
|
||||
expect(llama?.maxTokens).toBe(2048);
|
||||
});
|
||||
|
||||
it("retains catalog maxTokens when the API omits maxCompletionTokens", async () => {
|
||||
const fetchMock = vi.fn(
|
||||
async () =>
|
||||
new Response(
|
||||
JSON.stringify({
|
||||
data: [
|
||||
{
|
||||
id: "qwen3-235b-a22b-instruct-2507",
|
||||
model_spec: {
|
||||
name: "qwen3-235b-a22b-instruct-2507",
|
||||
privacy: "private",
|
||||
availableContextTokens: 131072,
|
||||
capabilities: {
|
||||
supportsReasoning: false,
|
||||
supportsVision: false,
|
||||
supportsFunctionCalling: true,
|
||||
},
|
||||
},
|
||||
},
|
||||
],
|
||||
}),
|
||||
{
|
||||
status: 200,
|
||||
headers: { "Content-Type": "application/json" },
|
||||
},
|
||||
),
|
||||
);
|
||||
vi.stubGlobal("fetch", fetchMock as unknown as typeof fetch);
|
||||
|
||||
const models = await runWithDiscoveryEnabled(() => discoverVeniceModels());
|
||||
const qwen = models.find((m) => m.id === "qwen3-235b-a22b-instruct-2507");
|
||||
expect(qwen?.maxTokens).toBe(16384);
|
||||
});
|
||||
|
||||
it("disables tools for catalog models that do not support function calling", () => {
|
||||
const model = buildVeniceModelDefinition(
|
||||
VENICE_MODEL_CATALOG.find((entry) => entry.id === "deepseek-v3.2")!,
|
||||
);
|
||||
expect(model.compat?.supportsTools).toBe(false);
|
||||
});
|
||||
|
||||
it("uses a conservative bounded maxTokens value for new models", async () => {
|
||||
const fetchMock = vi.fn(
|
||||
async () =>
|
||||
new Response(
|
||||
JSON.stringify({
|
||||
data: [
|
||||
{
|
||||
id: "new-model-2026",
|
||||
model_spec: {
|
||||
name: "new-model-2026",
|
||||
privacy: "private",
|
||||
availableContextTokens: 50_000,
|
||||
maxCompletionTokens: 200_000,
|
||||
capabilities: {
|
||||
supportsReasoning: false,
|
||||
supportsVision: false,
|
||||
supportsFunctionCalling: false,
|
||||
},
|
||||
},
|
||||
},
|
||||
],
|
||||
}),
|
||||
{
|
||||
status: 200,
|
||||
headers: { "Content-Type": "application/json" },
|
||||
},
|
||||
),
|
||||
);
|
||||
vi.stubGlobal("fetch", fetchMock as unknown as typeof fetch);
|
||||
|
||||
const models = await runWithDiscoveryEnabled(() => discoverVeniceModels());
|
||||
const newModel = models.find((m) => m.id === "new-model-2026");
|
||||
expect(newModel?.maxTokens).toBe(50000);
|
||||
expect(newModel?.maxTokens).toBeLessThanOrEqual(newModel?.contextWindow ?? Infinity);
|
||||
expect(newModel?.compat?.supportsTools).toBe(false);
|
||||
});
|
||||
|
||||
it("caps new-model maxTokens to the fallback context window when API context is missing", async () => {
|
||||
const fetchMock = vi.fn(
|
||||
async () =>
|
||||
new Response(
|
||||
JSON.stringify({
|
||||
data: [
|
||||
{
|
||||
id: "new-model-without-context",
|
||||
model_spec: {
|
||||
name: "new-model-without-context",
|
||||
privacy: "private",
|
||||
maxCompletionTokens: 200_000,
|
||||
capabilities: {
|
||||
supportsReasoning: false,
|
||||
supportsVision: false,
|
||||
supportsFunctionCalling: true,
|
||||
},
|
||||
},
|
||||
},
|
||||
],
|
||||
}),
|
||||
{
|
||||
status: 200,
|
||||
headers: { "Content-Type": "application/json" },
|
||||
},
|
||||
),
|
||||
);
|
||||
vi.stubGlobal("fetch", fetchMock as unknown as typeof fetch);
|
||||
|
||||
const models = await runWithDiscoveryEnabled(() => discoverVeniceModels());
|
||||
const newModel = models.find((m) => m.id === "new-model-without-context");
|
||||
expect(newModel?.contextWindow).toBe(128000);
|
||||
expect(newModel?.maxTokens).toBe(128000);
|
||||
});
|
||||
|
||||
it("ignores missing capabilities on partial metadata instead of aborting discovery", async () => {
|
||||
const fetchMock = vi.fn(
|
||||
async () =>
|
||||
new Response(
|
||||
JSON.stringify({
|
||||
data: [
|
||||
{
|
||||
id: "llama-3.3-70b",
|
||||
model_spec: {
|
||||
name: "llama-3.3-70b",
|
||||
privacy: "private",
|
||||
availableContextTokens: 131072,
|
||||
maxCompletionTokens: 2048,
|
||||
},
|
||||
},
|
||||
{
|
||||
id: "new-model-partial",
|
||||
model_spec: {
|
||||
name: "new-model-partial",
|
||||
privacy: "private",
|
||||
maxCompletionTokens: 2048,
|
||||
},
|
||||
},
|
||||
],
|
||||
}),
|
||||
{
|
||||
status: 200,
|
||||
headers: { "Content-Type": "application/json" },
|
||||
},
|
||||
),
|
||||
);
|
||||
vi.stubGlobal("fetch", fetchMock as unknown as typeof fetch);
|
||||
|
||||
const models = await runWithDiscoveryEnabled(() => discoverVeniceModels());
|
||||
const knownModel = models.find((m) => m.id === "llama-3.3-70b");
|
||||
const partialModel = models.find((m) => m.id === "new-model-partial");
|
||||
expect(models).not.toHaveLength(VENICE_MODEL_CATALOG.length);
|
||||
expect(knownModel?.maxTokens).toBe(2048);
|
||||
expect(partialModel?.contextWindow).toBe(128000);
|
||||
expect(partialModel?.maxTokens).toBe(2048);
|
||||
expect(partialModel?.compat?.supportsTools).toBeUndefined();
|
||||
});
|
||||
|
||||
it("keeps known models discoverable when a row omits model_spec", async () => {
|
||||
const fetchMock = vi.fn(
|
||||
async () =>
|
||||
new Response(
|
||||
JSON.stringify({
|
||||
data: [
|
||||
{
|
||||
id: "llama-3.3-70b",
|
||||
},
|
||||
{
|
||||
id: "new-model-valid",
|
||||
model_spec: {
|
||||
name: "new-model-valid",
|
||||
privacy: "private",
|
||||
availableContextTokens: 32_000,
|
||||
maxCompletionTokens: 2_048,
|
||||
capabilities: {
|
||||
supportsReasoning: false,
|
||||
supportsVision: false,
|
||||
supportsFunctionCalling: true,
|
||||
},
|
||||
},
|
||||
},
|
||||
],
|
||||
}),
|
||||
{
|
||||
status: 200,
|
||||
headers: { "Content-Type": "application/json" },
|
||||
},
|
||||
),
|
||||
);
|
||||
vi.stubGlobal("fetch", fetchMock as unknown as typeof fetch);
|
||||
|
||||
const models = await runWithDiscoveryEnabled(() => discoverVeniceModels());
|
||||
const knownModel = models.find((m) => m.id === "llama-3.3-70b");
|
||||
const newModel = models.find((m) => m.id === "new-model-valid");
|
||||
expect(models).not.toHaveLength(VENICE_MODEL_CATALOG.length);
|
||||
expect(knownModel?.maxTokens).toBe(4096);
|
||||
expect(newModel?.contextWindow).toBe(32000);
|
||||
expect(newModel?.maxTokens).toBe(2048);
|
||||
});
|
||||
|
||||
it("falls back to static catalog after retry budget is exhausted", async () => {
|
||||
const fetchMock = vi.fn(async () => {
|
||||
throw Object.assign(new TypeError("fetch failed"), {
|
||||
|
||||
@@ -17,6 +17,9 @@ export const VENICE_DEFAULT_COST = {
|
||||
cacheWrite: 0,
|
||||
};
|
||||
|
||||
const VENICE_DEFAULT_CONTEXT_WINDOW = 128_000;
|
||||
const VENICE_DEFAULT_MAX_TOKENS = 4096;
|
||||
const VENICE_DISCOVERY_HARD_MAX_TOKENS = 131_072;
|
||||
const VENICE_DISCOVERY_TIMEOUT_MS = 10_000;
|
||||
const VENICE_DISCOVERY_RETRYABLE_HTTP_STATUS = new Set([408, 425, 429, 500, 502, 503, 504]);
|
||||
const VENICE_DISCOVERY_RETRYABLE_NETWORK_CODES = new Set([
|
||||
@@ -59,8 +62,8 @@ export const VENICE_MODEL_CATALOG = [
|
||||
name: "Llama 3.3 70B",
|
||||
reasoning: false,
|
||||
input: ["text"],
|
||||
contextWindow: 131072,
|
||||
maxTokens: 8192,
|
||||
contextWindow: 128000,
|
||||
maxTokens: 4096,
|
||||
privacy: "private",
|
||||
},
|
||||
{
|
||||
@@ -68,8 +71,8 @@ export const VENICE_MODEL_CATALOG = [
|
||||
name: "Llama 3.2 3B",
|
||||
reasoning: false,
|
||||
input: ["text"],
|
||||
contextWindow: 131072,
|
||||
maxTokens: 8192,
|
||||
contextWindow: 128000,
|
||||
maxTokens: 4096,
|
||||
privacy: "private",
|
||||
},
|
||||
{
|
||||
@@ -77,8 +80,9 @@ export const VENICE_MODEL_CATALOG = [
|
||||
name: "Hermes 3 Llama 3.1 405B",
|
||||
reasoning: false,
|
||||
input: ["text"],
|
||||
contextWindow: 131072,
|
||||
maxTokens: 8192,
|
||||
contextWindow: 128000,
|
||||
maxTokens: 16384,
|
||||
supportsTools: false,
|
||||
privacy: "private",
|
||||
},
|
||||
|
||||
@@ -88,8 +92,8 @@ export const VENICE_MODEL_CATALOG = [
|
||||
name: "Qwen3 235B Thinking",
|
||||
reasoning: true,
|
||||
input: ["text"],
|
||||
contextWindow: 131072,
|
||||
maxTokens: 8192,
|
||||
contextWindow: 128000,
|
||||
maxTokens: 16384,
|
||||
privacy: "private",
|
||||
},
|
||||
{
|
||||
@@ -97,8 +101,8 @@ export const VENICE_MODEL_CATALOG = [
|
||||
name: "Qwen3 235B Instruct",
|
||||
reasoning: false,
|
||||
input: ["text"],
|
||||
contextWindow: 131072,
|
||||
maxTokens: 8192,
|
||||
contextWindow: 128000,
|
||||
maxTokens: 16384,
|
||||
privacy: "private",
|
||||
},
|
||||
{
|
||||
@@ -106,8 +110,26 @@ export const VENICE_MODEL_CATALOG = [
|
||||
name: "Qwen3 Coder 480B",
|
||||
reasoning: false,
|
||||
input: ["text"],
|
||||
contextWindow: 262144,
|
||||
maxTokens: 8192,
|
||||
contextWindow: 256000,
|
||||
maxTokens: 65536,
|
||||
privacy: "private",
|
||||
},
|
||||
{
|
||||
id: "qwen3-coder-480b-a35b-instruct-turbo",
|
||||
name: "Qwen3 Coder 480B Turbo",
|
||||
reasoning: false,
|
||||
input: ["text"],
|
||||
contextWindow: 256000,
|
||||
maxTokens: 65536,
|
||||
privacy: "private",
|
||||
},
|
||||
{
|
||||
id: "qwen3-5-35b-a3b",
|
||||
name: "Qwen3.5 35B A3B",
|
||||
reasoning: true,
|
||||
input: ["text", "image"],
|
||||
contextWindow: 256000,
|
||||
maxTokens: 65536,
|
||||
privacy: "private",
|
||||
},
|
||||
{
|
||||
@@ -115,8 +137,8 @@ export const VENICE_MODEL_CATALOG = [
|
||||
name: "Qwen3 Next 80B",
|
||||
reasoning: false,
|
||||
input: ["text"],
|
||||
contextWindow: 262144,
|
||||
maxTokens: 8192,
|
||||
contextWindow: 256000,
|
||||
maxTokens: 16384,
|
||||
privacy: "private",
|
||||
},
|
||||
{
|
||||
@@ -124,8 +146,8 @@ export const VENICE_MODEL_CATALOG = [
|
||||
name: "Qwen3 VL 235B (Vision)",
|
||||
reasoning: false,
|
||||
input: ["text", "image"],
|
||||
contextWindow: 262144,
|
||||
maxTokens: 8192,
|
||||
contextWindow: 256000,
|
||||
maxTokens: 16384,
|
||||
privacy: "private",
|
||||
},
|
||||
{
|
||||
@@ -133,8 +155,8 @@ export const VENICE_MODEL_CATALOG = [
|
||||
name: "Venice Small (Qwen3 4B)",
|
||||
reasoning: true,
|
||||
input: ["text"],
|
||||
contextWindow: 32768,
|
||||
maxTokens: 8192,
|
||||
contextWindow: 32000,
|
||||
maxTokens: 4096,
|
||||
privacy: "private",
|
||||
},
|
||||
|
||||
@@ -144,8 +166,9 @@ export const VENICE_MODEL_CATALOG = [
|
||||
name: "DeepSeek V3.2",
|
||||
reasoning: true,
|
||||
input: ["text"],
|
||||
contextWindow: 163840,
|
||||
maxTokens: 8192,
|
||||
contextWindow: 160000,
|
||||
maxTokens: 32768,
|
||||
supportsTools: false,
|
||||
privacy: "private",
|
||||
},
|
||||
|
||||
@@ -155,8 +178,9 @@ export const VENICE_MODEL_CATALOG = [
|
||||
name: "Venice Uncensored (Dolphin-Mistral)",
|
||||
reasoning: false,
|
||||
input: ["text"],
|
||||
contextWindow: 32768,
|
||||
maxTokens: 8192,
|
||||
contextWindow: 32000,
|
||||
maxTokens: 4096,
|
||||
supportsTools: false,
|
||||
privacy: "private",
|
||||
},
|
||||
{
|
||||
@@ -164,8 +188,8 @@ export const VENICE_MODEL_CATALOG = [
|
||||
name: "Venice Medium (Mistral)",
|
||||
reasoning: false,
|
||||
input: ["text", "image"],
|
||||
contextWindow: 131072,
|
||||
maxTokens: 8192,
|
||||
contextWindow: 128000,
|
||||
maxTokens: 4096,
|
||||
privacy: "private",
|
||||
},
|
||||
|
||||
@@ -175,8 +199,8 @@ export const VENICE_MODEL_CATALOG = [
|
||||
name: "Google Gemma 3 27B Instruct",
|
||||
reasoning: false,
|
||||
input: ["text", "image"],
|
||||
contextWindow: 202752,
|
||||
maxTokens: 8192,
|
||||
contextWindow: 198000,
|
||||
maxTokens: 16384,
|
||||
privacy: "private",
|
||||
},
|
||||
{
|
||||
@@ -184,8 +208,35 @@ export const VENICE_MODEL_CATALOG = [
|
||||
name: "OpenAI GPT OSS 120B",
|
||||
reasoning: false,
|
||||
input: ["text"],
|
||||
contextWindow: 131072,
|
||||
maxTokens: 8192,
|
||||
contextWindow: 128000,
|
||||
maxTokens: 16384,
|
||||
privacy: "private",
|
||||
},
|
||||
{
|
||||
id: "nvidia-nemotron-3-nano-30b-a3b",
|
||||
name: "NVIDIA Nemotron 3 Nano 30B",
|
||||
reasoning: false,
|
||||
input: ["text"],
|
||||
contextWindow: 128000,
|
||||
maxTokens: 16384,
|
||||
privacy: "private",
|
||||
},
|
||||
{
|
||||
id: "olafangensan-glm-4.7-flash-heretic",
|
||||
name: "GLM 4.7 Flash Heretic",
|
||||
reasoning: true,
|
||||
input: ["text"],
|
||||
contextWindow: 128000,
|
||||
maxTokens: 24000,
|
||||
privacy: "private",
|
||||
},
|
||||
{
|
||||
id: "zai-org-glm-4.6",
|
||||
name: "GLM 4.6",
|
||||
reasoning: false,
|
||||
input: ["text"],
|
||||
contextWindow: 198000,
|
||||
maxTokens: 16384,
|
||||
privacy: "private",
|
||||
},
|
||||
{
|
||||
@@ -193,8 +244,62 @@ export const VENICE_MODEL_CATALOG = [
|
||||
name: "GLM 4.7",
|
||||
reasoning: true,
|
||||
input: ["text"],
|
||||
contextWindow: 202752,
|
||||
maxTokens: 8192,
|
||||
contextWindow: 198000,
|
||||
maxTokens: 16384,
|
||||
privacy: "private",
|
||||
},
|
||||
{
|
||||
id: "zai-org-glm-4.7-flash",
|
||||
name: "GLM 4.7 Flash",
|
||||
reasoning: true,
|
||||
input: ["text"],
|
||||
contextWindow: 128000,
|
||||
maxTokens: 16384,
|
||||
privacy: "private",
|
||||
},
|
||||
{
|
||||
id: "zai-org-glm-5",
|
||||
name: "GLM 5",
|
||||
reasoning: true,
|
||||
input: ["text"],
|
||||
contextWindow: 198000,
|
||||
maxTokens: 32000,
|
||||
privacy: "private",
|
||||
},
|
||||
{
|
||||
id: "kimi-k2-5",
|
||||
name: "Kimi K2.5",
|
||||
reasoning: true,
|
||||
input: ["text", "image"],
|
||||
contextWindow: 256000,
|
||||
maxTokens: 65536,
|
||||
privacy: "private",
|
||||
},
|
||||
{
|
||||
id: "kimi-k2-thinking",
|
||||
name: "Kimi K2 Thinking",
|
||||
reasoning: true,
|
||||
input: ["text"],
|
||||
contextWindow: 256000,
|
||||
maxTokens: 65536,
|
||||
privacy: "private",
|
||||
},
|
||||
{
|
||||
id: "minimax-m21",
|
||||
name: "MiniMax M2.1",
|
||||
reasoning: true,
|
||||
input: ["text"],
|
||||
contextWindow: 198000,
|
||||
maxTokens: 32768,
|
||||
privacy: "private",
|
||||
},
|
||||
{
|
||||
id: "minimax-m25",
|
||||
name: "MiniMax M2.5",
|
||||
reasoning: true,
|
||||
input: ["text"],
|
||||
contextWindow: 198000,
|
||||
maxTokens: 32768,
|
||||
privacy: "private",
|
||||
},
|
||||
|
||||
@@ -205,21 +310,39 @@ export const VENICE_MODEL_CATALOG = [
|
||||
|
||||
// Anthropic (via Venice)
|
||||
{
|
||||
id: "claude-opus-45",
|
||||
id: "claude-opus-4-5",
|
||||
name: "Claude Opus 4.5 (via Venice)",
|
||||
reasoning: true,
|
||||
input: ["text", "image"],
|
||||
contextWindow: 202752,
|
||||
maxTokens: 8192,
|
||||
contextWindow: 198000,
|
||||
maxTokens: 32768,
|
||||
privacy: "anonymized",
|
||||
},
|
||||
{
|
||||
id: "claude-sonnet-45",
|
||||
id: "claude-opus-4-6",
|
||||
name: "Claude Opus 4.6 (via Venice)",
|
||||
reasoning: true,
|
||||
input: ["text", "image"],
|
||||
contextWindow: 1000000,
|
||||
maxTokens: 128000,
|
||||
privacy: "anonymized",
|
||||
},
|
||||
{
|
||||
id: "claude-sonnet-4-5",
|
||||
name: "Claude Sonnet 4.5 (via Venice)",
|
||||
reasoning: true,
|
||||
input: ["text", "image"],
|
||||
contextWindow: 202752,
|
||||
maxTokens: 8192,
|
||||
contextWindow: 198000,
|
||||
maxTokens: 64000,
|
||||
privacy: "anonymized",
|
||||
},
|
||||
{
|
||||
id: "claude-sonnet-4-6",
|
||||
name: "Claude Sonnet 4.6 (via Venice)",
|
||||
reasoning: true,
|
||||
input: ["text", "image"],
|
||||
contextWindow: 1000000,
|
||||
maxTokens: 64000,
|
||||
privacy: "anonymized",
|
||||
},
|
||||
|
||||
@@ -229,8 +352,8 @@ export const VENICE_MODEL_CATALOG = [
|
||||
name: "GPT-5.2 (via Venice)",
|
||||
reasoning: true,
|
||||
input: ["text"],
|
||||
contextWindow: 262144,
|
||||
maxTokens: 8192,
|
||||
contextWindow: 256000,
|
||||
maxTokens: 65536,
|
||||
privacy: "anonymized",
|
||||
},
|
||||
{
|
||||
@@ -238,8 +361,44 @@ export const VENICE_MODEL_CATALOG = [
|
||||
name: "GPT-5.2 Codex (via Venice)",
|
||||
reasoning: true,
|
||||
input: ["text", "image"],
|
||||
contextWindow: 262144,
|
||||
maxTokens: 8192,
|
||||
contextWindow: 256000,
|
||||
maxTokens: 65536,
|
||||
privacy: "anonymized",
|
||||
},
|
||||
{
|
||||
id: "openai-gpt-53-codex",
|
||||
name: "GPT-5.3 Codex (via Venice)",
|
||||
reasoning: true,
|
||||
input: ["text", "image"],
|
||||
contextWindow: 400000,
|
||||
maxTokens: 128000,
|
||||
privacy: "anonymized",
|
||||
},
|
||||
{
|
||||
id: "openai-gpt-54",
|
||||
name: "GPT-5.4 (via Venice)",
|
||||
reasoning: true,
|
||||
input: ["text", "image"],
|
||||
contextWindow: 1000000,
|
||||
maxTokens: 131072,
|
||||
privacy: "anonymized",
|
||||
},
|
||||
{
|
||||
id: "openai-gpt-4o-2024-11-20",
|
||||
name: "GPT-4o (via Venice)",
|
||||
reasoning: false,
|
||||
input: ["text", "image"],
|
||||
contextWindow: 128000,
|
||||
maxTokens: 16384,
|
||||
privacy: "anonymized",
|
||||
},
|
||||
{
|
||||
id: "openai-gpt-4o-mini-2024-07-18",
|
||||
name: "GPT-4o Mini (via Venice)",
|
||||
reasoning: false,
|
||||
input: ["text", "image"],
|
||||
contextWindow: 128000,
|
||||
maxTokens: 16384,
|
||||
privacy: "anonymized",
|
||||
},
|
||||
|
||||
@@ -249,8 +408,17 @@ export const VENICE_MODEL_CATALOG = [
|
||||
name: "Gemini 3 Pro (via Venice)",
|
||||
reasoning: true,
|
||||
input: ["text", "image"],
|
||||
contextWindow: 202752,
|
||||
maxTokens: 8192,
|
||||
contextWindow: 198000,
|
||||
maxTokens: 32768,
|
||||
privacy: "anonymized",
|
||||
},
|
||||
{
|
||||
id: "gemini-3-1-pro-preview",
|
||||
name: "Gemini 3.1 Pro (via Venice)",
|
||||
reasoning: true,
|
||||
input: ["text", "image"],
|
||||
contextWindow: 1000000,
|
||||
maxTokens: 32768,
|
||||
privacy: "anonymized",
|
||||
},
|
||||
{
|
||||
@@ -258,8 +426,8 @@ export const VENICE_MODEL_CATALOG = [
|
||||
name: "Gemini 3 Flash (via Venice)",
|
||||
reasoning: true,
|
||||
input: ["text", "image"],
|
||||
contextWindow: 262144,
|
||||
maxTokens: 8192,
|
||||
contextWindow: 256000,
|
||||
maxTokens: 65536,
|
||||
privacy: "anonymized",
|
||||
},
|
||||
|
||||
@@ -269,8 +437,8 @@ export const VENICE_MODEL_CATALOG = [
|
||||
name: "Grok 4.1 Fast (via Venice)",
|
||||
reasoning: true,
|
||||
input: ["text", "image"],
|
||||
contextWindow: 262144,
|
||||
maxTokens: 8192,
|
||||
contextWindow: 1000000,
|
||||
maxTokens: 30000,
|
||||
privacy: "anonymized",
|
||||
},
|
||||
{
|
||||
@@ -278,28 +446,8 @@ export const VENICE_MODEL_CATALOG = [
|
||||
name: "Grok Code Fast 1 (via Venice)",
|
||||
reasoning: true,
|
||||
input: ["text"],
|
||||
contextWindow: 262144,
|
||||
maxTokens: 8192,
|
||||
privacy: "anonymized",
|
||||
},
|
||||
|
||||
// Other anonymized models
|
||||
{
|
||||
id: "kimi-k2-thinking",
|
||||
name: "Kimi K2 Thinking (via Venice)",
|
||||
reasoning: true,
|
||||
input: ["text"],
|
||||
contextWindow: 262144,
|
||||
maxTokens: 8192,
|
||||
privacy: "anonymized",
|
||||
},
|
||||
{
|
||||
id: "minimax-m21",
|
||||
name: "MiniMax M2.5 (via Venice)",
|
||||
reasoning: true,
|
||||
input: ["text"],
|
||||
contextWindow: 202752,
|
||||
maxTokens: 8192,
|
||||
contextWindow: 256000,
|
||||
maxTokens: 10000,
|
||||
privacy: "anonymized",
|
||||
},
|
||||
] as const;
|
||||
@@ -326,6 +474,7 @@ export function buildVeniceModelDefinition(entry: VeniceCatalogEntry): ModelDefi
|
||||
// See: https://github.com/openclaw/openclaw/issues/15819
|
||||
compat: {
|
||||
supportsUsageInStreaming: false,
|
||||
...("supportsTools" in entry && !entry.supportsTools ? { supportsTools: false } : {}),
|
||||
},
|
||||
};
|
||||
}
|
||||
@@ -334,17 +483,18 @@ export function buildVeniceModelDefinition(entry: VeniceCatalogEntry): ModelDefi
|
||||
interface VeniceModelSpec {
|
||||
name: string;
|
||||
privacy: "private" | "anonymized";
|
||||
availableContextTokens: number;
|
||||
capabilities: {
|
||||
supportsReasoning: boolean;
|
||||
supportsVision: boolean;
|
||||
supportsFunctionCalling: boolean;
|
||||
availableContextTokens?: number;
|
||||
maxCompletionTokens?: number;
|
||||
capabilities?: {
|
||||
supportsReasoning?: boolean;
|
||||
supportsVision?: boolean;
|
||||
supportsFunctionCalling?: boolean;
|
||||
};
|
||||
}
|
||||
|
||||
interface VeniceModel {
|
||||
id: string;
|
||||
model_spec: VeniceModelSpec;
|
||||
model_spec?: VeniceModelSpec;
|
||||
}
|
||||
|
||||
interface VeniceModelsResponse {
|
||||
@@ -412,6 +562,36 @@ function isRetryableVeniceDiscoveryError(err: unknown): boolean {
|
||||
return hasRetryableNetworkCode(err);
|
||||
}
|
||||
|
||||
function normalizePositiveInt(value: unknown): number | undefined {
|
||||
if (typeof value !== "number" || !Number.isFinite(value) || value <= 0) {
|
||||
return undefined;
|
||||
}
|
||||
return Math.floor(value);
|
||||
}
|
||||
|
||||
function resolveApiMaxCompletionTokens(params: {
|
||||
apiModel: VeniceModel;
|
||||
knownMaxTokens?: number;
|
||||
}): number | undefined {
|
||||
const raw = normalizePositiveInt(params.apiModel.model_spec?.maxCompletionTokens);
|
||||
if (!raw) {
|
||||
return undefined;
|
||||
}
|
||||
const contextWindow = normalizePositiveInt(params.apiModel.model_spec?.availableContextTokens);
|
||||
const knownMaxTokens =
|
||||
typeof params.knownMaxTokens === "number" && Number.isFinite(params.knownMaxTokens)
|
||||
? Math.floor(params.knownMaxTokens)
|
||||
: undefined;
|
||||
const hardCap = knownMaxTokens ?? VENICE_DISCOVERY_HARD_MAX_TOKENS;
|
||||
const fallbackContextWindow = knownMaxTokens ?? VENICE_DEFAULT_CONTEXT_WINDOW;
|
||||
return Math.min(raw, contextWindow ?? fallbackContextWindow, hardCap);
|
||||
}
|
||||
|
||||
function resolveApiSupportsTools(apiModel: VeniceModel): boolean | undefined {
|
||||
const supportsFunctionCalling = apiModel.model_spec?.capabilities?.supportsFunctionCalling;
|
||||
return typeof supportsFunctionCalling === "boolean" ? supportsFunctionCalling : undefined;
|
||||
}
|
||||
|
||||
/**
|
||||
* Discover models from Venice API with fallback to static catalog.
|
||||
* The /models endpoint is public and doesn't require authentication.
|
||||
@@ -468,30 +648,50 @@ export async function discoverVeniceModels(): Promise<ModelDefinitionConfig[]> {
|
||||
|
||||
for (const apiModel of data.data) {
|
||||
const catalogEntry = catalogById.get(apiModel.id);
|
||||
const apiMaxTokens = resolveApiMaxCompletionTokens({
|
||||
apiModel,
|
||||
knownMaxTokens: catalogEntry?.maxTokens,
|
||||
});
|
||||
const apiSupportsTools = resolveApiSupportsTools(apiModel);
|
||||
if (catalogEntry) {
|
||||
// Use catalog metadata for known models
|
||||
models.push(buildVeniceModelDefinition(catalogEntry));
|
||||
const definition = buildVeniceModelDefinition(catalogEntry);
|
||||
if (apiMaxTokens !== undefined) {
|
||||
definition.maxTokens = apiMaxTokens;
|
||||
}
|
||||
// We only let live discovery disable tools. Re-enabling tool support still
|
||||
// requires a catalog update so a transient/bad /models response cannot
|
||||
// silently expand the tool execution surface for known models.
|
||||
if (apiSupportsTools === false) {
|
||||
definition.compat = {
|
||||
...definition.compat,
|
||||
supportsTools: false,
|
||||
};
|
||||
}
|
||||
models.push(definition);
|
||||
} else {
|
||||
// Create definition for newly discovered models not in catalog
|
||||
const apiSpec = apiModel.model_spec;
|
||||
const isReasoning =
|
||||
apiModel.model_spec.capabilities.supportsReasoning ||
|
||||
apiSpec?.capabilities?.supportsReasoning ||
|
||||
apiModel.id.toLowerCase().includes("thinking") ||
|
||||
apiModel.id.toLowerCase().includes("reason") ||
|
||||
apiModel.id.toLowerCase().includes("r1");
|
||||
|
||||
const hasVision = apiModel.model_spec.capabilities.supportsVision;
|
||||
const hasVision = apiSpec?.capabilities?.supportsVision === true;
|
||||
|
||||
models.push({
|
||||
id: apiModel.id,
|
||||
name: apiModel.model_spec.name || apiModel.id,
|
||||
name: apiSpec?.name || apiModel.id,
|
||||
reasoning: isReasoning,
|
||||
input: hasVision ? ["text", "image"] : ["text"],
|
||||
cost: VENICE_DEFAULT_COST,
|
||||
contextWindow: apiModel.model_spec.availableContextTokens || 128000,
|
||||
maxTokens: 8192,
|
||||
contextWindow:
|
||||
normalizePositiveInt(apiSpec?.availableContextTokens) ?? VENICE_DEFAULT_CONTEXT_WINDOW,
|
||||
maxTokens: apiMaxTokens ?? VENICE_DEFAULT_MAX_TOKENS,
|
||||
// Avoid usage-only streaming chunks that can break OpenAI-compatible parsers.
|
||||
compat: {
|
||||
supportsUsageInStreaming: false,
|
||||
...(apiSupportsTools === false ? { supportsTools: false } : {}),
|
||||
},
|
||||
});
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user