From 5320ee7731676382398520fe94a01a94929a6587 Mon Sep 17 00:00:00 2001 From: Vincent Koc Date: Fri, 6 Mar 2026 19:07:11 -0500 Subject: [PATCH] fix(venice): harden discovery limits and tool support (#38306) * Config: add supportsTools compat flag * Agents: add model tool support helper * Venice: sync discovery and fallback metadata * Agents: skip tools for unsupported models * Changelog: note Venice provider hardening * Update CHANGELOG.md * Venice: cap degraded discovery metadata * Apply suggestion from @greptile-apps[bot] Co-authored-by: greptile-apps[bot] <165735046+greptile-apps[bot]@users.noreply.github.com> * Venice: tolerate partial discovery capabilities * Venice: tolerate missing discovery specs --------- Co-authored-by: greptile-apps[bot] <165735046+greptile-apps[bot]@users.noreply.github.com> --- CHANGELOG.md | 1 + src/agents/model-tool-support.test.ts | 16 + src/agents/model-tool-support.ts | 7 + src/agents/pi-embedded-runner/compact.ts | 6 +- src/agents/pi-embedded-runner/run/attempt.ts | 14 +- src/agents/venice-models.test.ts | 234 ++++++++++++ src/agents/venice-models.ts | 362 ++++++++++++++----- src/config/types.models.ts | 1 + src/config/zod-schema.core.ts | 1 + 9 files changed, 556 insertions(+), 86 deletions(-) create mode 100644 src/agents/model-tool-support.test.ts create mode 100644 src/agents/model-tool-support.ts diff --git a/CHANGELOG.md b/CHANGELOG.md index 6f1a55432a9..b1d0dfe4003 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -89,6 +89,7 @@ Docs: https://docs.openclaw.ai - Auto-reply/system events: restore runtime system events to the message timeline (`System:` lines), preserve think-hint parsing with prepended events, and carry events into deferred followup/collect/steer-backlog prompts to keep cache behavior stable without dropping queued metadata. (#34794) Thanks @anisoptera. - Security/audit account handling: avoid prototype-chain account IDs in audit validation by using own-property checks for `accounts`. (#34982) Thanks @HOYALIM. - Cron/restart catch-up semantics: replay interrupted recurring jobs and missed immediate cron slots on startup without replaying interrupted one-shot jobs, with guarded missed-slot probing to avoid malformed-schedule startup aborts and duplicate-trigger drift after restart. (from #34466, #34896, #34625, #33206) Thanks @dunamismax, @dsantoreis, @Octane0411, and @Sid-Qin. +- Venice/provider onboarding hardening: align per-model Venice completion-token limits with discovery metadata, clamp untrusted discovery values to safe bounds, sync the static Venice fallback catalog with current live model metadata, and disable tool wiring for Venice models that do not support function calling so default Venice setups no longer fail with `max_completion_tokens` or unsupported-tools 400s. Fixes #38168. Thanks @Sid-Qin, @powermaster888 and @vincentkoc. - Agents/session usage tracking: preserve accumulated usage metadata on embedded Pi runner error exits so failed turns still update session `totalTokens` from real usage instead of stale prior values. (#34275) thanks @RealKai42. - Slack/reaction thread context routing: carry Slack native DM channel IDs through inbound context and threading tool resolution so reaction targets resolve consistently for DM `To=user:*` sessions (including `toolContext.currentChannelId` fallback behavior). (from #34831; overlaps #34440, #34502, #34483, #32754) Thanks @dunamismax. - Subagents/announce completion scoping: scope nested direct-child completion aggregation to the current requester run window, harden frozen completion capture for deterministic descendant synthesis, and route completion announce delivery through parent-agent announce turns with provenance-aware internal events. (#35080) Thanks @tyler6204. diff --git a/src/agents/model-tool-support.test.ts b/src/agents/model-tool-support.test.ts new file mode 100644 index 00000000000..22fa511e892 --- /dev/null +++ b/src/agents/model-tool-support.test.ts @@ -0,0 +1,16 @@ +import { describe, expect, it } from "vitest"; +import { supportsModelTools } from "./model-tool-support.js"; + +describe("supportsModelTools", () => { + it("defaults to true when the model has no compat override", () => { + expect(supportsModelTools({} as never)).toBe(true); + }); + + it("returns true when compat.supportsTools is true", () => { + expect(supportsModelTools({ compat: { supportsTools: true } } as never)).toBe(true); + }); + + it("returns false when compat.supportsTools is false", () => { + expect(supportsModelTools({ compat: { supportsTools: false } } as never)).toBe(false); + }); +}); diff --git a/src/agents/model-tool-support.ts b/src/agents/model-tool-support.ts new file mode 100644 index 00000000000..2b68b6347b3 --- /dev/null +++ b/src/agents/model-tool-support.ts @@ -0,0 +1,7 @@ +export function supportsModelTools(model: { compat?: unknown }): boolean { + const compat = + model.compat && typeof model.compat === "object" + ? (model.compat as { supportsTools?: boolean }) + : undefined; + return compat?.supportsTools !== false; +} diff --git a/src/agents/pi-embedded-runner/compact.ts b/src/agents/pi-embedded-runner/compact.ts index 335c3a0e7d9..92bf4b97f23 100644 --- a/src/agents/pi-embedded-runner/compact.ts +++ b/src/agents/pi-embedded-runner/compact.ts @@ -38,6 +38,7 @@ import { formatUserTime, resolveUserTimeFormat, resolveUserTimezone } from "../d import { DEFAULT_CONTEXT_TOKENS, DEFAULT_MODEL, DEFAULT_PROVIDER } from "../defaults.js"; import { resolveOpenClawDocsPath } from "../docs-path.js"; import { getApiKeyForModel, resolveModelAuthMode } from "../model-auth.js"; +import { supportsModelTools } from "../model-tool-support.js"; import { ensureOpenClawModelsJson } from "../models-config.js"; import { resolveOwnerDisplaySetting } from "../owner-display.js"; import { @@ -400,7 +401,10 @@ export async function compactEmbeddedPiSessionDirect( modelContextWindowTokens: model.contextWindow, modelAuthMode: resolveModelAuthMode(model.provider, params.config), }); - const tools = sanitizeToolsForGoogle({ tools: toolsRaw, provider }); + const tools = sanitizeToolsForGoogle({ + tools: supportsModelTools(model) ? toolsRaw : [], + provider, + }); const allowedToolNames = collectAllowedToolNames({ tools }); logToolSchemasForGoogle({ tools, provider }); const machineName = await getMachineDisplayName(); diff --git a/src/agents/pi-embedded-runner/run/attempt.ts b/src/agents/pi-embedded-runner/run/attempt.ts index 61159c13357..e8bac7d6fba 100644 --- a/src/agents/pi-embedded-runner/run/attempt.ts +++ b/src/agents/pi-embedded-runner/run/attempt.ts @@ -49,6 +49,7 @@ import { isTimeoutError } from "../../failover-error.js"; import { resolveImageSanitizationLimits } from "../../image-sanitization.js"; import { resolveModelAuthMode } from "../../model-auth.js"; import { normalizeProviderId, resolveDefaultModelForAgent } from "../../model-selection.js"; +import { supportsModelTools } from "../../model-tool-support.js"; import { createOllamaStreamFn, OLLAMA_NATIVE_BASE_URL } from "../../ollama-stream.js"; import { createOpenAIWebSocketStreamFn, releaseWsSession } from "../../openai-ws-stream.js"; import { resolveOwnerDisplaySetting } from "../../owner-display.js"; @@ -878,10 +879,15 @@ export async function runEmbeddedAttempt( params.requireExplicitMessageTarget ?? isSubagentSessionKey(params.sessionKey), disableMessageTool: params.disableMessageTool, }); - const tools = sanitizeToolsForGoogle({ tools: toolsRaw, provider: params.provider }); + const toolsEnabled = supportsModelTools(params.model); + const tools = sanitizeToolsForGoogle({ + tools: toolsEnabled ? toolsRaw : [], + provider: params.provider, + }); + const clientTools = toolsEnabled ? params.clientTools : undefined; const allowedToolNames = collectAllowedToolNames({ tools, - clientTools: params.clientTools, + clientTools, }); logToolSchemasForGoogle({ tools, provider: params.provider }); @@ -1146,9 +1152,9 @@ export async function runEmbeddedAttempt( cfg: params.config, agentId: sessionAgentId, }); - const clientToolDefs = params.clientTools + const clientToolDefs = clientTools ? toClientToolDefinitions( - params.clientTools, + clientTools, (toolName, toolParams) => { clientToolCallDetected = { name: toolName, params: toolParams }; }, diff --git a/src/agents/venice-models.test.ts b/src/agents/venice-models.test.ts index 95fc7f61f8a..5a93568f9b7 100644 --- a/src/agents/venice-models.test.ts +++ b/src/agents/venice-models.test.ts @@ -42,6 +42,7 @@ function makeModelsResponse(id: string): Response { name: id, privacy: "private", availableContextTokens: 131072, + maxCompletionTokens: 4096, capabilities: { supportsReasoning: false, supportsVision: false, @@ -94,6 +95,239 @@ describe("venice-models", () => { expect(models.map((m) => m.id)).toContain("llama-3.3-70b"); }); + it("uses API maxCompletionTokens for catalog models when present", async () => { + const fetchMock = vi.fn( + async () => + new Response( + JSON.stringify({ + data: [ + { + id: "llama-3.3-70b", + model_spec: { + name: "llama-3.3-70b", + privacy: "private", + availableContextTokens: 131072, + maxCompletionTokens: 2048, + capabilities: { + supportsReasoning: false, + supportsVision: false, + supportsFunctionCalling: true, + }, + }, + }, + ], + }), + { + status: 200, + headers: { "Content-Type": "application/json" }, + }, + ), + ); + vi.stubGlobal("fetch", fetchMock as unknown as typeof fetch); + + const models = await runWithDiscoveryEnabled(() => discoverVeniceModels()); + const llama = models.find((m) => m.id === "llama-3.3-70b"); + expect(llama?.maxTokens).toBe(2048); + }); + + it("retains catalog maxTokens when the API omits maxCompletionTokens", async () => { + const fetchMock = vi.fn( + async () => + new Response( + JSON.stringify({ + data: [ + { + id: "qwen3-235b-a22b-instruct-2507", + model_spec: { + name: "qwen3-235b-a22b-instruct-2507", + privacy: "private", + availableContextTokens: 131072, + capabilities: { + supportsReasoning: false, + supportsVision: false, + supportsFunctionCalling: true, + }, + }, + }, + ], + }), + { + status: 200, + headers: { "Content-Type": "application/json" }, + }, + ), + ); + vi.stubGlobal("fetch", fetchMock as unknown as typeof fetch); + + const models = await runWithDiscoveryEnabled(() => discoverVeniceModels()); + const qwen = models.find((m) => m.id === "qwen3-235b-a22b-instruct-2507"); + expect(qwen?.maxTokens).toBe(16384); + }); + + it("disables tools for catalog models that do not support function calling", () => { + const model = buildVeniceModelDefinition( + VENICE_MODEL_CATALOG.find((entry) => entry.id === "deepseek-v3.2")!, + ); + expect(model.compat?.supportsTools).toBe(false); + }); + + it("uses a conservative bounded maxTokens value for new models", async () => { + const fetchMock = vi.fn( + async () => + new Response( + JSON.stringify({ + data: [ + { + id: "new-model-2026", + model_spec: { + name: "new-model-2026", + privacy: "private", + availableContextTokens: 50_000, + maxCompletionTokens: 200_000, + capabilities: { + supportsReasoning: false, + supportsVision: false, + supportsFunctionCalling: false, + }, + }, + }, + ], + }), + { + status: 200, + headers: { "Content-Type": "application/json" }, + }, + ), + ); + vi.stubGlobal("fetch", fetchMock as unknown as typeof fetch); + + const models = await runWithDiscoveryEnabled(() => discoverVeniceModels()); + const newModel = models.find((m) => m.id === "new-model-2026"); + expect(newModel?.maxTokens).toBe(50000); + expect(newModel?.maxTokens).toBeLessThanOrEqual(newModel?.contextWindow ?? Infinity); + expect(newModel?.compat?.supportsTools).toBe(false); + }); + + it("caps new-model maxTokens to the fallback context window when API context is missing", async () => { + const fetchMock = vi.fn( + async () => + new Response( + JSON.stringify({ + data: [ + { + id: "new-model-without-context", + model_spec: { + name: "new-model-without-context", + privacy: "private", + maxCompletionTokens: 200_000, + capabilities: { + supportsReasoning: false, + supportsVision: false, + supportsFunctionCalling: true, + }, + }, + }, + ], + }), + { + status: 200, + headers: { "Content-Type": "application/json" }, + }, + ), + ); + vi.stubGlobal("fetch", fetchMock as unknown as typeof fetch); + + const models = await runWithDiscoveryEnabled(() => discoverVeniceModels()); + const newModel = models.find((m) => m.id === "new-model-without-context"); + expect(newModel?.contextWindow).toBe(128000); + expect(newModel?.maxTokens).toBe(128000); + }); + + it("ignores missing capabilities on partial metadata instead of aborting discovery", async () => { + const fetchMock = vi.fn( + async () => + new Response( + JSON.stringify({ + data: [ + { + id: "llama-3.3-70b", + model_spec: { + name: "llama-3.3-70b", + privacy: "private", + availableContextTokens: 131072, + maxCompletionTokens: 2048, + }, + }, + { + id: "new-model-partial", + model_spec: { + name: "new-model-partial", + privacy: "private", + maxCompletionTokens: 2048, + }, + }, + ], + }), + { + status: 200, + headers: { "Content-Type": "application/json" }, + }, + ), + ); + vi.stubGlobal("fetch", fetchMock as unknown as typeof fetch); + + const models = await runWithDiscoveryEnabled(() => discoverVeniceModels()); + const knownModel = models.find((m) => m.id === "llama-3.3-70b"); + const partialModel = models.find((m) => m.id === "new-model-partial"); + expect(models).not.toHaveLength(VENICE_MODEL_CATALOG.length); + expect(knownModel?.maxTokens).toBe(2048); + expect(partialModel?.contextWindow).toBe(128000); + expect(partialModel?.maxTokens).toBe(2048); + expect(partialModel?.compat?.supportsTools).toBeUndefined(); + }); + + it("keeps known models discoverable when a row omits model_spec", async () => { + const fetchMock = vi.fn( + async () => + new Response( + JSON.stringify({ + data: [ + { + id: "llama-3.3-70b", + }, + { + id: "new-model-valid", + model_spec: { + name: "new-model-valid", + privacy: "private", + availableContextTokens: 32_000, + maxCompletionTokens: 2_048, + capabilities: { + supportsReasoning: false, + supportsVision: false, + supportsFunctionCalling: true, + }, + }, + }, + ], + }), + { + status: 200, + headers: { "Content-Type": "application/json" }, + }, + ), + ); + vi.stubGlobal("fetch", fetchMock as unknown as typeof fetch); + + const models = await runWithDiscoveryEnabled(() => discoverVeniceModels()); + const knownModel = models.find((m) => m.id === "llama-3.3-70b"); + const newModel = models.find((m) => m.id === "new-model-valid"); + expect(models).not.toHaveLength(VENICE_MODEL_CATALOG.length); + expect(knownModel?.maxTokens).toBe(4096); + expect(newModel?.contextWindow).toBe(32000); + expect(newModel?.maxTokens).toBe(2048); + }); + it("falls back to static catalog after retry budget is exhausted", async () => { const fetchMock = vi.fn(async () => { throw Object.assign(new TypeError("fetch failed"), { diff --git a/src/agents/venice-models.ts b/src/agents/venice-models.ts index b33b51c60a8..47dcf4d2f4d 100644 --- a/src/agents/venice-models.ts +++ b/src/agents/venice-models.ts @@ -17,6 +17,9 @@ export const VENICE_DEFAULT_COST = { cacheWrite: 0, }; +const VENICE_DEFAULT_CONTEXT_WINDOW = 128_000; +const VENICE_DEFAULT_MAX_TOKENS = 4096; +const VENICE_DISCOVERY_HARD_MAX_TOKENS = 131_072; const VENICE_DISCOVERY_TIMEOUT_MS = 10_000; const VENICE_DISCOVERY_RETRYABLE_HTTP_STATUS = new Set([408, 425, 429, 500, 502, 503, 504]); const VENICE_DISCOVERY_RETRYABLE_NETWORK_CODES = new Set([ @@ -59,8 +62,8 @@ export const VENICE_MODEL_CATALOG = [ name: "Llama 3.3 70B", reasoning: false, input: ["text"], - contextWindow: 131072, - maxTokens: 8192, + contextWindow: 128000, + maxTokens: 4096, privacy: "private", }, { @@ -68,8 +71,8 @@ export const VENICE_MODEL_CATALOG = [ name: "Llama 3.2 3B", reasoning: false, input: ["text"], - contextWindow: 131072, - maxTokens: 8192, + contextWindow: 128000, + maxTokens: 4096, privacy: "private", }, { @@ -77,8 +80,9 @@ export const VENICE_MODEL_CATALOG = [ name: "Hermes 3 Llama 3.1 405B", reasoning: false, input: ["text"], - contextWindow: 131072, - maxTokens: 8192, + contextWindow: 128000, + maxTokens: 16384, + supportsTools: false, privacy: "private", }, @@ -88,8 +92,8 @@ export const VENICE_MODEL_CATALOG = [ name: "Qwen3 235B Thinking", reasoning: true, input: ["text"], - contextWindow: 131072, - maxTokens: 8192, + contextWindow: 128000, + maxTokens: 16384, privacy: "private", }, { @@ -97,8 +101,8 @@ export const VENICE_MODEL_CATALOG = [ name: "Qwen3 235B Instruct", reasoning: false, input: ["text"], - contextWindow: 131072, - maxTokens: 8192, + contextWindow: 128000, + maxTokens: 16384, privacy: "private", }, { @@ -106,8 +110,26 @@ export const VENICE_MODEL_CATALOG = [ name: "Qwen3 Coder 480B", reasoning: false, input: ["text"], - contextWindow: 262144, - maxTokens: 8192, + contextWindow: 256000, + maxTokens: 65536, + privacy: "private", + }, + { + id: "qwen3-coder-480b-a35b-instruct-turbo", + name: "Qwen3 Coder 480B Turbo", + reasoning: false, + input: ["text"], + contextWindow: 256000, + maxTokens: 65536, + privacy: "private", + }, + { + id: "qwen3-5-35b-a3b", + name: "Qwen3.5 35B A3B", + reasoning: true, + input: ["text", "image"], + contextWindow: 256000, + maxTokens: 65536, privacy: "private", }, { @@ -115,8 +137,8 @@ export const VENICE_MODEL_CATALOG = [ name: "Qwen3 Next 80B", reasoning: false, input: ["text"], - contextWindow: 262144, - maxTokens: 8192, + contextWindow: 256000, + maxTokens: 16384, privacy: "private", }, { @@ -124,8 +146,8 @@ export const VENICE_MODEL_CATALOG = [ name: "Qwen3 VL 235B (Vision)", reasoning: false, input: ["text", "image"], - contextWindow: 262144, - maxTokens: 8192, + contextWindow: 256000, + maxTokens: 16384, privacy: "private", }, { @@ -133,8 +155,8 @@ export const VENICE_MODEL_CATALOG = [ name: "Venice Small (Qwen3 4B)", reasoning: true, input: ["text"], - contextWindow: 32768, - maxTokens: 8192, + contextWindow: 32000, + maxTokens: 4096, privacy: "private", }, @@ -144,8 +166,9 @@ export const VENICE_MODEL_CATALOG = [ name: "DeepSeek V3.2", reasoning: true, input: ["text"], - contextWindow: 163840, - maxTokens: 8192, + contextWindow: 160000, + maxTokens: 32768, + supportsTools: false, privacy: "private", }, @@ -155,8 +178,9 @@ export const VENICE_MODEL_CATALOG = [ name: "Venice Uncensored (Dolphin-Mistral)", reasoning: false, input: ["text"], - contextWindow: 32768, - maxTokens: 8192, + contextWindow: 32000, + maxTokens: 4096, + supportsTools: false, privacy: "private", }, { @@ -164,8 +188,8 @@ export const VENICE_MODEL_CATALOG = [ name: "Venice Medium (Mistral)", reasoning: false, input: ["text", "image"], - contextWindow: 131072, - maxTokens: 8192, + contextWindow: 128000, + maxTokens: 4096, privacy: "private", }, @@ -175,8 +199,8 @@ export const VENICE_MODEL_CATALOG = [ name: "Google Gemma 3 27B Instruct", reasoning: false, input: ["text", "image"], - contextWindow: 202752, - maxTokens: 8192, + contextWindow: 198000, + maxTokens: 16384, privacy: "private", }, { @@ -184,8 +208,35 @@ export const VENICE_MODEL_CATALOG = [ name: "OpenAI GPT OSS 120B", reasoning: false, input: ["text"], - contextWindow: 131072, - maxTokens: 8192, + contextWindow: 128000, + maxTokens: 16384, + privacy: "private", + }, + { + id: "nvidia-nemotron-3-nano-30b-a3b", + name: "NVIDIA Nemotron 3 Nano 30B", + reasoning: false, + input: ["text"], + contextWindow: 128000, + maxTokens: 16384, + privacy: "private", + }, + { + id: "olafangensan-glm-4.7-flash-heretic", + name: "GLM 4.7 Flash Heretic", + reasoning: true, + input: ["text"], + contextWindow: 128000, + maxTokens: 24000, + privacy: "private", + }, + { + id: "zai-org-glm-4.6", + name: "GLM 4.6", + reasoning: false, + input: ["text"], + contextWindow: 198000, + maxTokens: 16384, privacy: "private", }, { @@ -193,8 +244,62 @@ export const VENICE_MODEL_CATALOG = [ name: "GLM 4.7", reasoning: true, input: ["text"], - contextWindow: 202752, - maxTokens: 8192, + contextWindow: 198000, + maxTokens: 16384, + privacy: "private", + }, + { + id: "zai-org-glm-4.7-flash", + name: "GLM 4.7 Flash", + reasoning: true, + input: ["text"], + contextWindow: 128000, + maxTokens: 16384, + privacy: "private", + }, + { + id: "zai-org-glm-5", + name: "GLM 5", + reasoning: true, + input: ["text"], + contextWindow: 198000, + maxTokens: 32000, + privacy: "private", + }, + { + id: "kimi-k2-5", + name: "Kimi K2.5", + reasoning: true, + input: ["text", "image"], + contextWindow: 256000, + maxTokens: 65536, + privacy: "private", + }, + { + id: "kimi-k2-thinking", + name: "Kimi K2 Thinking", + reasoning: true, + input: ["text"], + contextWindow: 256000, + maxTokens: 65536, + privacy: "private", + }, + { + id: "minimax-m21", + name: "MiniMax M2.1", + reasoning: true, + input: ["text"], + contextWindow: 198000, + maxTokens: 32768, + privacy: "private", + }, + { + id: "minimax-m25", + name: "MiniMax M2.5", + reasoning: true, + input: ["text"], + contextWindow: 198000, + maxTokens: 32768, privacy: "private", }, @@ -205,21 +310,39 @@ export const VENICE_MODEL_CATALOG = [ // Anthropic (via Venice) { - id: "claude-opus-45", + id: "claude-opus-4-5", name: "Claude Opus 4.5 (via Venice)", reasoning: true, input: ["text", "image"], - contextWindow: 202752, - maxTokens: 8192, + contextWindow: 198000, + maxTokens: 32768, privacy: "anonymized", }, { - id: "claude-sonnet-45", + id: "claude-opus-4-6", + name: "Claude Opus 4.6 (via Venice)", + reasoning: true, + input: ["text", "image"], + contextWindow: 1000000, + maxTokens: 128000, + privacy: "anonymized", + }, + { + id: "claude-sonnet-4-5", name: "Claude Sonnet 4.5 (via Venice)", reasoning: true, input: ["text", "image"], - contextWindow: 202752, - maxTokens: 8192, + contextWindow: 198000, + maxTokens: 64000, + privacy: "anonymized", + }, + { + id: "claude-sonnet-4-6", + name: "Claude Sonnet 4.6 (via Venice)", + reasoning: true, + input: ["text", "image"], + contextWindow: 1000000, + maxTokens: 64000, privacy: "anonymized", }, @@ -229,8 +352,8 @@ export const VENICE_MODEL_CATALOG = [ name: "GPT-5.2 (via Venice)", reasoning: true, input: ["text"], - contextWindow: 262144, - maxTokens: 8192, + contextWindow: 256000, + maxTokens: 65536, privacy: "anonymized", }, { @@ -238,8 +361,44 @@ export const VENICE_MODEL_CATALOG = [ name: "GPT-5.2 Codex (via Venice)", reasoning: true, input: ["text", "image"], - contextWindow: 262144, - maxTokens: 8192, + contextWindow: 256000, + maxTokens: 65536, + privacy: "anonymized", + }, + { + id: "openai-gpt-53-codex", + name: "GPT-5.3 Codex (via Venice)", + reasoning: true, + input: ["text", "image"], + contextWindow: 400000, + maxTokens: 128000, + privacy: "anonymized", + }, + { + id: "openai-gpt-54", + name: "GPT-5.4 (via Venice)", + reasoning: true, + input: ["text", "image"], + contextWindow: 1000000, + maxTokens: 131072, + privacy: "anonymized", + }, + { + id: "openai-gpt-4o-2024-11-20", + name: "GPT-4o (via Venice)", + reasoning: false, + input: ["text", "image"], + contextWindow: 128000, + maxTokens: 16384, + privacy: "anonymized", + }, + { + id: "openai-gpt-4o-mini-2024-07-18", + name: "GPT-4o Mini (via Venice)", + reasoning: false, + input: ["text", "image"], + contextWindow: 128000, + maxTokens: 16384, privacy: "anonymized", }, @@ -249,8 +408,17 @@ export const VENICE_MODEL_CATALOG = [ name: "Gemini 3 Pro (via Venice)", reasoning: true, input: ["text", "image"], - contextWindow: 202752, - maxTokens: 8192, + contextWindow: 198000, + maxTokens: 32768, + privacy: "anonymized", + }, + { + id: "gemini-3-1-pro-preview", + name: "Gemini 3.1 Pro (via Venice)", + reasoning: true, + input: ["text", "image"], + contextWindow: 1000000, + maxTokens: 32768, privacy: "anonymized", }, { @@ -258,8 +426,8 @@ export const VENICE_MODEL_CATALOG = [ name: "Gemini 3 Flash (via Venice)", reasoning: true, input: ["text", "image"], - contextWindow: 262144, - maxTokens: 8192, + contextWindow: 256000, + maxTokens: 65536, privacy: "anonymized", }, @@ -269,8 +437,8 @@ export const VENICE_MODEL_CATALOG = [ name: "Grok 4.1 Fast (via Venice)", reasoning: true, input: ["text", "image"], - contextWindow: 262144, - maxTokens: 8192, + contextWindow: 1000000, + maxTokens: 30000, privacy: "anonymized", }, { @@ -278,28 +446,8 @@ export const VENICE_MODEL_CATALOG = [ name: "Grok Code Fast 1 (via Venice)", reasoning: true, input: ["text"], - contextWindow: 262144, - maxTokens: 8192, - privacy: "anonymized", - }, - - // Other anonymized models - { - id: "kimi-k2-thinking", - name: "Kimi K2 Thinking (via Venice)", - reasoning: true, - input: ["text"], - contextWindow: 262144, - maxTokens: 8192, - privacy: "anonymized", - }, - { - id: "minimax-m21", - name: "MiniMax M2.5 (via Venice)", - reasoning: true, - input: ["text"], - contextWindow: 202752, - maxTokens: 8192, + contextWindow: 256000, + maxTokens: 10000, privacy: "anonymized", }, ] as const; @@ -326,6 +474,7 @@ export function buildVeniceModelDefinition(entry: VeniceCatalogEntry): ModelDefi // See: https://github.com/openclaw/openclaw/issues/15819 compat: { supportsUsageInStreaming: false, + ...("supportsTools" in entry && !entry.supportsTools ? { supportsTools: false } : {}), }, }; } @@ -334,17 +483,18 @@ export function buildVeniceModelDefinition(entry: VeniceCatalogEntry): ModelDefi interface VeniceModelSpec { name: string; privacy: "private" | "anonymized"; - availableContextTokens: number; - capabilities: { - supportsReasoning: boolean; - supportsVision: boolean; - supportsFunctionCalling: boolean; + availableContextTokens?: number; + maxCompletionTokens?: number; + capabilities?: { + supportsReasoning?: boolean; + supportsVision?: boolean; + supportsFunctionCalling?: boolean; }; } interface VeniceModel { id: string; - model_spec: VeniceModelSpec; + model_spec?: VeniceModelSpec; } interface VeniceModelsResponse { @@ -412,6 +562,36 @@ function isRetryableVeniceDiscoveryError(err: unknown): boolean { return hasRetryableNetworkCode(err); } +function normalizePositiveInt(value: unknown): number | undefined { + if (typeof value !== "number" || !Number.isFinite(value) || value <= 0) { + return undefined; + } + return Math.floor(value); +} + +function resolveApiMaxCompletionTokens(params: { + apiModel: VeniceModel; + knownMaxTokens?: number; +}): number | undefined { + const raw = normalizePositiveInt(params.apiModel.model_spec?.maxCompletionTokens); + if (!raw) { + return undefined; + } + const contextWindow = normalizePositiveInt(params.apiModel.model_spec?.availableContextTokens); + const knownMaxTokens = + typeof params.knownMaxTokens === "number" && Number.isFinite(params.knownMaxTokens) + ? Math.floor(params.knownMaxTokens) + : undefined; + const hardCap = knownMaxTokens ?? VENICE_DISCOVERY_HARD_MAX_TOKENS; + const fallbackContextWindow = knownMaxTokens ?? VENICE_DEFAULT_CONTEXT_WINDOW; + return Math.min(raw, contextWindow ?? fallbackContextWindow, hardCap); +} + +function resolveApiSupportsTools(apiModel: VeniceModel): boolean | undefined { + const supportsFunctionCalling = apiModel.model_spec?.capabilities?.supportsFunctionCalling; + return typeof supportsFunctionCalling === "boolean" ? supportsFunctionCalling : undefined; +} + /** * Discover models from Venice API with fallback to static catalog. * The /models endpoint is public and doesn't require authentication. @@ -468,30 +648,50 @@ export async function discoverVeniceModels(): Promise { for (const apiModel of data.data) { const catalogEntry = catalogById.get(apiModel.id); + const apiMaxTokens = resolveApiMaxCompletionTokens({ + apiModel, + knownMaxTokens: catalogEntry?.maxTokens, + }); + const apiSupportsTools = resolveApiSupportsTools(apiModel); if (catalogEntry) { - // Use catalog metadata for known models - models.push(buildVeniceModelDefinition(catalogEntry)); + const definition = buildVeniceModelDefinition(catalogEntry); + if (apiMaxTokens !== undefined) { + definition.maxTokens = apiMaxTokens; + } + // We only let live discovery disable tools. Re-enabling tool support still + // requires a catalog update so a transient/bad /models response cannot + // silently expand the tool execution surface for known models. + if (apiSupportsTools === false) { + definition.compat = { + ...definition.compat, + supportsTools: false, + }; + } + models.push(definition); } else { // Create definition for newly discovered models not in catalog + const apiSpec = apiModel.model_spec; const isReasoning = - apiModel.model_spec.capabilities.supportsReasoning || + apiSpec?.capabilities?.supportsReasoning || apiModel.id.toLowerCase().includes("thinking") || apiModel.id.toLowerCase().includes("reason") || apiModel.id.toLowerCase().includes("r1"); - const hasVision = apiModel.model_spec.capabilities.supportsVision; + const hasVision = apiSpec?.capabilities?.supportsVision === true; models.push({ id: apiModel.id, - name: apiModel.model_spec.name || apiModel.id, + name: apiSpec?.name || apiModel.id, reasoning: isReasoning, input: hasVision ? ["text", "image"] : ["text"], cost: VENICE_DEFAULT_COST, - contextWindow: apiModel.model_spec.availableContextTokens || 128000, - maxTokens: 8192, + contextWindow: + normalizePositiveInt(apiSpec?.availableContextTokens) ?? VENICE_DEFAULT_CONTEXT_WINDOW, + maxTokens: apiMaxTokens ?? VENICE_DEFAULT_MAX_TOKENS, // Avoid usage-only streaming chunks that can break OpenAI-compatible parsers. compat: { supportsUsageInStreaming: false, + ...(apiSupportsTools === false ? { supportsTools: false } : {}), }, }); } diff --git a/src/config/types.models.ts b/src/config/types.models.ts index 6e7e9efe5f0..4ef646cc48a 100644 --- a/src/config/types.models.ts +++ b/src/config/types.models.ts @@ -18,6 +18,7 @@ export type ModelCompatConfig = { supportsDeveloperRole?: boolean; supportsReasoningEffort?: boolean; supportsUsageInStreaming?: boolean; + supportsTools?: boolean; supportsStrictMode?: boolean; maxTokensField?: "max_completion_tokens" | "max_tokens"; thinkingFormat?: "openai" | "zai" | "qwen"; diff --git a/src/config/zod-schema.core.ts b/src/config/zod-schema.core.ts index 48c4429940b..733917e4dac 100644 --- a/src/config/zod-schema.core.ts +++ b/src/config/zod-schema.core.ts @@ -188,6 +188,7 @@ export const ModelCompatSchema = z supportsDeveloperRole: z.boolean().optional(), supportsReasoningEffort: z.boolean().optional(), supportsUsageInStreaming: z.boolean().optional(), + supportsTools: z.boolean().optional(), supportsStrictMode: z.boolean().optional(), maxTokensField: z .union([z.literal("max_completion_tokens"), z.literal("max_tokens")])