fix(venice): harden discovery limits and tool support (#38306)

* Config: add supportsTools compat flag * Agents: add model tool support helper * Venice: sync discovery and fallback metadata * Agents: skip tools for unsupported models * Changelog: note Venice provider hardening * Update CHANGELOG.md * Venice: cap degraded discovery metadata * Apply suggestion from @greptile-apps[bot] Co-authored-by: greptile-apps[bot] <165735046+greptile-apps[bot]@users.noreply.github.com> * Venice: tolerate partial discovery capabilities * Venice: tolerate missing discovery specs --------- Co-authored-by: greptile-apps[bot] <165735046+greptile-apps[bot]@users.noreply.github.com>
2026-05-10 14:04:58 +00:00 · 2026-03-06 19:07:11 -05:00
parent 942c53e7f0
commit 5320ee7731
9 changed files with 556 additions and 86 deletions
--- a/src/agents/model-tool-support.test.ts
+++ b/src/agents/model-tool-support.test.ts
@@ -0,0 +1,16 @@
+import { describe, expect, it } from "vitest";
+import { supportsModelTools } from "./model-tool-support.js";
+
+describe("supportsModelTools", () => {
+  it("defaults to true when the model has no compat override", () => {
+    expect(supportsModelTools({} as never)).toBe(true);
+  });
+
+  it("returns true when compat.supportsTools is true", () => {
+    expect(supportsModelTools({ compat: { supportsTools: true } } as never)).toBe(true);
+  });
+
+  it("returns false when compat.supportsTools is false", () => {
+    expect(supportsModelTools({ compat: { supportsTools: false } } as never)).toBe(false);
+  });
+});
--- a/src/agents/model-tool-support.ts
+++ b/src/agents/model-tool-support.ts
@@ -0,0 +1,7 @@
+export function supportsModelTools(model: { compat?: unknown }): boolean {
+  const compat =
+    model.compat && typeof model.compat === "object"
+      ? (model.compat as { supportsTools?: boolean })
+      : undefined;
+  return compat?.supportsTools !== false;
+}
--- a/src/agents/pi-embedded-runner/compact.ts
+++ b/src/agents/pi-embedded-runner/compact.ts
@@ -38,6 +38,7 @@ import { formatUserTime, resolveUserTimeFormat, resolveUserTimezone } from "../d
 import { DEFAULT_CONTEXT_TOKENS, DEFAULT_MODEL, DEFAULT_PROVIDER } from "../defaults.js";
 import { resolveOpenClawDocsPath } from "../docs-path.js";
 import { getApiKeyForModel, resolveModelAuthMode } from "../model-auth.js";
+import { supportsModelTools } from "../model-tool-support.js";
 import { ensureOpenClawModelsJson } from "../models-config.js";
 import { resolveOwnerDisplaySetting } from "../owner-display.js";
 import {
@@ -400,7 +401,10 @@ export async function compactEmbeddedPiSessionDirect(
      modelContextWindowTokens: model.contextWindow,
      modelAuthMode: resolveModelAuthMode(model.provider, params.config),
    });
-    const tools = sanitizeToolsForGoogle({ tools: toolsRaw, provider });
+    const tools = sanitizeToolsForGoogle({
+      tools: supportsModelTools(model) ? toolsRaw : [],
+      provider,
+    });
    const allowedToolNames = collectAllowedToolNames({ tools });
    logToolSchemasForGoogle({ tools, provider });
    const machineName = await getMachineDisplayName();
--- a/src/agents/pi-embedded-runner/run/attempt.ts
+++ b/src/agents/pi-embedded-runner/run/attempt.ts
@@ -49,6 +49,7 @@ import { isTimeoutError } from "../../failover-error.js";
 import { resolveImageSanitizationLimits } from "../../image-sanitization.js";
 import { resolveModelAuthMode } from "../../model-auth.js";
 import { normalizeProviderId, resolveDefaultModelForAgent } from "../../model-selection.js";
+import { supportsModelTools } from "../../model-tool-support.js";
 import { createOllamaStreamFn, OLLAMA_NATIVE_BASE_URL } from "../../ollama-stream.js";
 import { createOpenAIWebSocketStreamFn, releaseWsSession } from "../../openai-ws-stream.js";
 import { resolveOwnerDisplaySetting } from "../../owner-display.js";
@@ -878,10 +879,15 @@ export async function runEmbeddedAttempt(
            params.requireExplicitMessageTarget ?? isSubagentSessionKey(params.sessionKey),
          disableMessageTool: params.disableMessageTool,
        });
-    const tools = sanitizeToolsForGoogle({ tools: toolsRaw, provider: params.provider });
+    const toolsEnabled = supportsModelTools(params.model);
+    const tools = sanitizeToolsForGoogle({
+      tools: toolsEnabled ? toolsRaw : [],
+      provider: params.provider,
+    });
+    const clientTools = toolsEnabled ? params.clientTools : undefined;
    const allowedToolNames = collectAllowedToolNames({
      tools,
-      clientTools: params.clientTools,
+      clientTools,
    });
    logToolSchemasForGoogle({ tools, provider: params.provider });

@@ -1146,9 +1152,9 @@ export async function runEmbeddedAttempt(
        cfg: params.config,
        agentId: sessionAgentId,
      });
-      const clientToolDefs = params.clientTools
+      const clientToolDefs = clientTools
        ? toClientToolDefinitions(
-            params.clientTools,
+            clientTools,
            (toolName, toolParams) => {
              clientToolCallDetected = { name: toolName, params: toolParams };
            },
--- a/src/agents/venice-models.test.ts
+++ b/src/agents/venice-models.test.ts
@@ -42,6 +42,7 @@ function makeModelsResponse(id: string): Response {
            name: id,
            privacy: "private",
            availableContextTokens: 131072,
+            maxCompletionTokens: 4096,
            capabilities: {
              supportsReasoning: false,
              supportsVision: false,
@@ -94,6 +95,239 @@ describe("venice-models", () => {
    expect(models.map((m) => m.id)).toContain("llama-3.3-70b");
  });

+  it("uses API maxCompletionTokens for catalog models when present", async () => {
+    const fetchMock = vi.fn(
+      async () =>
+        new Response(
+          JSON.stringify({
+            data: [
+              {
+                id: "llama-3.3-70b",
+                model_spec: {
+                  name: "llama-3.3-70b",
+                  privacy: "private",
+                  availableContextTokens: 131072,
+                  maxCompletionTokens: 2048,
+                  capabilities: {
+                    supportsReasoning: false,
+                    supportsVision: false,
+                    supportsFunctionCalling: true,
+                  },
+                },
+              },
+            ],
+          }),
+          {
+            status: 200,
+            headers: { "Content-Type": "application/json" },
+          },
+        ),
+    );
+    vi.stubGlobal("fetch", fetchMock as unknown as typeof fetch);
+
+    const models = await runWithDiscoveryEnabled(() => discoverVeniceModels());
+    const llama = models.find((m) => m.id === "llama-3.3-70b");
+    expect(llama?.maxTokens).toBe(2048);
+  });
+
+  it("retains catalog maxTokens when the API omits maxCompletionTokens", async () => {
+    const fetchMock = vi.fn(
+      async () =>
+        new Response(
+          JSON.stringify({
+            data: [
+              {
+                id: "qwen3-235b-a22b-instruct-2507",
+                model_spec: {
+                  name: "qwen3-235b-a22b-instruct-2507",
+                  privacy: "private",
+                  availableContextTokens: 131072,
+                  capabilities: {
+                    supportsReasoning: false,
+                    supportsVision: false,
+                    supportsFunctionCalling: true,
+                  },
+                },
+              },
+            ],
+          }),
+          {
+            status: 200,
+            headers: { "Content-Type": "application/json" },
+          },
+        ),
+    );
+    vi.stubGlobal("fetch", fetchMock as unknown as typeof fetch);
+
+    const models = await runWithDiscoveryEnabled(() => discoverVeniceModels());
+    const qwen = models.find((m) => m.id === "qwen3-235b-a22b-instruct-2507");
+    expect(qwen?.maxTokens).toBe(16384);
+  });
+
+  it("disables tools for catalog models that do not support function calling", () => {
+    const model = buildVeniceModelDefinition(
+      VENICE_MODEL_CATALOG.find((entry) => entry.id === "deepseek-v3.2")!,
+    );
+    expect(model.compat?.supportsTools).toBe(false);
+  });
+
+  it("uses a conservative bounded maxTokens value for new models", async () => {
+    const fetchMock = vi.fn(
+      async () =>
+        new Response(
+          JSON.stringify({
+            data: [
+              {
+                id: "new-model-2026",
+                model_spec: {
+                  name: "new-model-2026",
+                  privacy: "private",
+                  availableContextTokens: 50_000,
+                  maxCompletionTokens: 200_000,
+                  capabilities: {
+                    supportsReasoning: false,
+                    supportsVision: false,
+                    supportsFunctionCalling: false,
+                  },
+                },
+              },
+            ],
+          }),
+          {
+            status: 200,
+            headers: { "Content-Type": "application/json" },
+          },
+        ),
+    );
+    vi.stubGlobal("fetch", fetchMock as unknown as typeof fetch);
+
+    const models = await runWithDiscoveryEnabled(() => discoverVeniceModels());
+    const newModel = models.find((m) => m.id === "new-model-2026");
+    expect(newModel?.maxTokens).toBe(50000);
+    expect(newModel?.maxTokens).toBeLessThanOrEqual(newModel?.contextWindow ?? Infinity);
+    expect(newModel?.compat?.supportsTools).toBe(false);
+  });
+
+  it("caps new-model maxTokens to the fallback context window when API context is missing", async () => {
+    const fetchMock = vi.fn(
+      async () =>
+        new Response(
+          JSON.stringify({
+            data: [
+              {
+                id: "new-model-without-context",
+                model_spec: {
+                  name: "new-model-without-context",
+                  privacy: "private",
+                  maxCompletionTokens: 200_000,
+                  capabilities: {
+                    supportsReasoning: false,
+                    supportsVision: false,
+                    supportsFunctionCalling: true,
+                  },
+                },
+              },
+            ],
+          }),
+          {
+            status: 200,
+            headers: { "Content-Type": "application/json" },
+          },
+        ),
+    );
+    vi.stubGlobal("fetch", fetchMock as unknown as typeof fetch);
+
+    const models = await runWithDiscoveryEnabled(() => discoverVeniceModels());
+    const newModel = models.find((m) => m.id === "new-model-without-context");
+    expect(newModel?.contextWindow).toBe(128000);
+    expect(newModel?.maxTokens).toBe(128000);
+  });
+
+  it("ignores missing capabilities on partial metadata instead of aborting discovery", async () => {
+    const fetchMock = vi.fn(
+      async () =>
+        new Response(
+          JSON.stringify({
+            data: [
+              {
+                id: "llama-3.3-70b",
+                model_spec: {
+                  name: "llama-3.3-70b",
+                  privacy: "private",
+                  availableContextTokens: 131072,
+                  maxCompletionTokens: 2048,
+                },
+              },
+              {
+                id: "new-model-partial",
+                model_spec: {
+                  name: "new-model-partial",
+                  privacy: "private",
+                  maxCompletionTokens: 2048,
+                },
+              },
+            ],
+          }),
+          {
+            status: 200,
+            headers: { "Content-Type": "application/json" },
+          },
+        ),
+    );
+    vi.stubGlobal("fetch", fetchMock as unknown as typeof fetch);
+
+    const models = await runWithDiscoveryEnabled(() => discoverVeniceModels());
+    const knownModel = models.find((m) => m.id === "llama-3.3-70b");
+    const partialModel = models.find((m) => m.id === "new-model-partial");
+    expect(models).not.toHaveLength(VENICE_MODEL_CATALOG.length);
+    expect(knownModel?.maxTokens).toBe(2048);
+    expect(partialModel?.contextWindow).toBe(128000);
+    expect(partialModel?.maxTokens).toBe(2048);
+    expect(partialModel?.compat?.supportsTools).toBeUndefined();
+  });
+
+  it("keeps known models discoverable when a row omits model_spec", async () => {
+    const fetchMock = vi.fn(
+      async () =>
+        new Response(
+          JSON.stringify({
+            data: [
+              {
+                id: "llama-3.3-70b",
+              },
+              {
+                id: "new-model-valid",
+                model_spec: {
+                  name: "new-model-valid",
+                  privacy: "private",
+                  availableContextTokens: 32_000,
+                  maxCompletionTokens: 2_048,
+                  capabilities: {
+                    supportsReasoning: false,
+                    supportsVision: false,
+                    supportsFunctionCalling: true,
+                  },
+                },
+              },
+            ],
+          }),
+          {
+            status: 200,
+            headers: { "Content-Type": "application/json" },
+          },
+        ),
+    );
+    vi.stubGlobal("fetch", fetchMock as unknown as typeof fetch);
+
+    const models = await runWithDiscoveryEnabled(() => discoverVeniceModels());
+    const knownModel = models.find((m) => m.id === "llama-3.3-70b");
+    const newModel = models.find((m) => m.id === "new-model-valid");
+    expect(models).not.toHaveLength(VENICE_MODEL_CATALOG.length);
+    expect(knownModel?.maxTokens).toBe(4096);
+    expect(newModel?.contextWindow).toBe(32000);
+    expect(newModel?.maxTokens).toBe(2048);
+  });
+
  it("falls back to static catalog after retry budget is exhausted", async () => {
    const fetchMock = vi.fn(async () => {
      throw Object.assign(new TypeError("fetch failed"), {
--- a/src/agents/venice-models.ts
+++ b/src/agents/venice-models.ts
@@ -17,6 +17,9 @@ export const VENICE_DEFAULT_COST = {
  cacheWrite: 0,
 };

+const VENICE_DEFAULT_CONTEXT_WINDOW = 128_000;
+const VENICE_DEFAULT_MAX_TOKENS = 4096;
+const VENICE_DISCOVERY_HARD_MAX_TOKENS = 131_072;
 const VENICE_DISCOVERY_TIMEOUT_MS = 10_000;
 const VENICE_DISCOVERY_RETRYABLE_HTTP_STATUS = new Set([408, 425, 429, 500, 502, 503, 504]);
 const VENICE_DISCOVERY_RETRYABLE_NETWORK_CODES = new Set([
@@ -59,8 +62,8 @@ export const VENICE_MODEL_CATALOG = [
    name: "Llama 3.3 70B",
    reasoning: false,
    input: ["text"],
-    contextWindow: 131072,
-    maxTokens: 8192,
+    contextWindow: 128000,
+    maxTokens: 4096,
    privacy: "private",
  },
  {
@@ -68,8 +71,8 @@ export const VENICE_MODEL_CATALOG = [
    name: "Llama 3.2 3B",
    reasoning: false,
    input: ["text"],
-    contextWindow: 131072,
-    maxTokens: 8192,
+    contextWindow: 128000,
+    maxTokens: 4096,
    privacy: "private",
  },
  {
@@ -77,8 +80,9 @@ export const VENICE_MODEL_CATALOG = [
    name: "Hermes 3 Llama 3.1 405B",
    reasoning: false,
    input: ["text"],
-    contextWindow: 131072,
-    maxTokens: 8192,
+    contextWindow: 128000,
+    maxTokens: 16384,
+    supportsTools: false,
    privacy: "private",
  },

@@ -88,8 +92,8 @@ export const VENICE_MODEL_CATALOG = [
    name: "Qwen3 235B Thinking",
    reasoning: true,
    input: ["text"],
-    contextWindow: 131072,
-    maxTokens: 8192,
+    contextWindow: 128000,
+    maxTokens: 16384,
    privacy: "private",
  },
  {
@@ -97,8 +101,8 @@ export const VENICE_MODEL_CATALOG = [
    name: "Qwen3 235B Instruct",
    reasoning: false,
    input: ["text"],
-    contextWindow: 131072,
-    maxTokens: 8192,
+    contextWindow: 128000,
+    maxTokens: 16384,
    privacy: "private",
  },
  {
@@ -106,8 +110,26 @@ export const VENICE_MODEL_CATALOG = [
    name: "Qwen3 Coder 480B",
    reasoning: false,
    input: ["text"],
-    contextWindow: 262144,
-    maxTokens: 8192,
+    contextWindow: 256000,
+    maxTokens: 65536,
+    privacy: "private",
+  },
+  {
+    id: "qwen3-coder-480b-a35b-instruct-turbo",
+    name: "Qwen3 Coder 480B Turbo",
+    reasoning: false,
+    input: ["text"],
+    contextWindow: 256000,
+    maxTokens: 65536,
+    privacy: "private",
+  },
+  {
+    id: "qwen3-5-35b-a3b",
+    name: "Qwen3.5 35B A3B",
+    reasoning: true,
+    input: ["text", "image"],
+    contextWindow: 256000,
+    maxTokens: 65536,
    privacy: "private",
  },
  {
@@ -115,8 +137,8 @@ export const VENICE_MODEL_CATALOG = [
    name: "Qwen3 Next 80B",
    reasoning: false,
    input: ["text"],
-    contextWindow: 262144,
-    maxTokens: 8192,
+    contextWindow: 256000,
+    maxTokens: 16384,
    privacy: "private",
  },
  {
@@ -124,8 +146,8 @@ export const VENICE_MODEL_CATALOG = [
    name: "Qwen3 VL 235B (Vision)",
    reasoning: false,
    input: ["text", "image"],
-    contextWindow: 262144,
-    maxTokens: 8192,
+    contextWindow: 256000,
+    maxTokens: 16384,
    privacy: "private",
  },
  {
@@ -133,8 +155,8 @@ export const VENICE_MODEL_CATALOG = [
    name: "Venice Small (Qwen3 4B)",
    reasoning: true,
    input: ["text"],
-    contextWindow: 32768,
-    maxTokens: 8192,
+    contextWindow: 32000,
+    maxTokens: 4096,
    privacy: "private",
  },

@@ -144,8 +166,9 @@ export const VENICE_MODEL_CATALOG = [
    name: "DeepSeek V3.2",
    reasoning: true,
    input: ["text"],
-    contextWindow: 163840,
-    maxTokens: 8192,
+    contextWindow: 160000,
+    maxTokens: 32768,
+    supportsTools: false,
    privacy: "private",
  },

@@ -155,8 +178,9 @@ export const VENICE_MODEL_CATALOG = [
    name: "Venice Uncensored (Dolphin-Mistral)",
    reasoning: false,
    input: ["text"],
-    contextWindow: 32768,
-    maxTokens: 8192,
+    contextWindow: 32000,
+    maxTokens: 4096,
+    supportsTools: false,
    privacy: "private",
  },
  {
@@ -164,8 +188,8 @@ export const VENICE_MODEL_CATALOG = [
    name: "Venice Medium (Mistral)",
    reasoning: false,
    input: ["text", "image"],
-    contextWindow: 131072,
-    maxTokens: 8192,
+    contextWindow: 128000,
+    maxTokens: 4096,
    privacy: "private",
  },

@@ -175,8 +199,8 @@ export const VENICE_MODEL_CATALOG = [
    name: "Google Gemma 3 27B Instruct",
    reasoning: false,
    input: ["text", "image"],
-    contextWindow: 202752,
-    maxTokens: 8192,
+    contextWindow: 198000,
+    maxTokens: 16384,
    privacy: "private",
  },
  {
@@ -184,8 +208,35 @@ export const VENICE_MODEL_CATALOG = [
    name: "OpenAI GPT OSS 120B",
    reasoning: false,
    input: ["text"],
-    contextWindow: 131072,
-    maxTokens: 8192,
+    contextWindow: 128000,
+    maxTokens: 16384,
+    privacy: "private",
+  },
+  {
+    id: "nvidia-nemotron-3-nano-30b-a3b",
+    name: "NVIDIA Nemotron 3 Nano 30B",
+    reasoning: false,
+    input: ["text"],
+    contextWindow: 128000,
+    maxTokens: 16384,
+    privacy: "private",
+  },
+  {
+    id: "olafangensan-glm-4.7-flash-heretic",
+    name: "GLM 4.7 Flash Heretic",
+    reasoning: true,
+    input: ["text"],
+    contextWindow: 128000,
+    maxTokens: 24000,
+    privacy: "private",
+  },
+  {
+    id: "zai-org-glm-4.6",
+    name: "GLM 4.6",
+    reasoning: false,
+    input: ["text"],
+    contextWindow: 198000,
+    maxTokens: 16384,
    privacy: "private",
  },
  {
@@ -193,8 +244,62 @@ export const VENICE_MODEL_CATALOG = [
    name: "GLM 4.7",
    reasoning: true,
    input: ["text"],
-    contextWindow: 202752,
-    maxTokens: 8192,
+    contextWindow: 198000,
+    maxTokens: 16384,
+    privacy: "private",
+  },
+  {
+    id: "zai-org-glm-4.7-flash",
+    name: "GLM 4.7 Flash",
+    reasoning: true,
+    input: ["text"],
+    contextWindow: 128000,
+    maxTokens: 16384,
+    privacy: "private",
+  },
+  {
+    id: "zai-org-glm-5",
+    name: "GLM 5",
+    reasoning: true,
+    input: ["text"],
+    contextWindow: 198000,
+    maxTokens: 32000,
+    privacy: "private",
+  },
+  {
+    id: "kimi-k2-5",
+    name: "Kimi K2.5",
+    reasoning: true,
+    input: ["text", "image"],
+    contextWindow: 256000,
+    maxTokens: 65536,
+    privacy: "private",
+  },
+  {
+    id: "kimi-k2-thinking",
+    name: "Kimi K2 Thinking",
+    reasoning: true,
+    input: ["text"],
+    contextWindow: 256000,
+    maxTokens: 65536,
+    privacy: "private",
+  },
+  {
+    id: "minimax-m21",
+    name: "MiniMax M2.1",
+    reasoning: true,
+    input: ["text"],
+    contextWindow: 198000,
+    maxTokens: 32768,
+    privacy: "private",
+  },
+  {
+    id: "minimax-m25",
+    name: "MiniMax M2.5",
+    reasoning: true,
+    input: ["text"],
+    contextWindow: 198000,
+    maxTokens: 32768,
    privacy: "private",
  },

@@ -205,21 +310,39 @@ export const VENICE_MODEL_CATALOG = [

  // Anthropic (via Venice)
  {
-    id: "claude-opus-45",
+    id: "claude-opus-4-5",
    name: "Claude Opus 4.5 (via Venice)",
    reasoning: true,
    input: ["text", "image"],
-    contextWindow: 202752,
-    maxTokens: 8192,
+    contextWindow: 198000,
+    maxTokens: 32768,
    privacy: "anonymized",
  },
  {
-    id: "claude-sonnet-45",
+    id: "claude-opus-4-6",
+    name: "Claude Opus 4.6 (via Venice)",
+    reasoning: true,
+    input: ["text", "image"],
+    contextWindow: 1000000,
+    maxTokens: 128000,
+    privacy: "anonymized",
+  },
+  {
+    id: "claude-sonnet-4-5",
    name: "Claude Sonnet 4.5 (via Venice)",
    reasoning: true,
    input: ["text", "image"],
-    contextWindow: 202752,
-    maxTokens: 8192,
+    contextWindow: 198000,
+    maxTokens: 64000,
+    privacy: "anonymized",
+  },
+  {
+    id: "claude-sonnet-4-6",
+    name: "Claude Sonnet 4.6 (via Venice)",
+    reasoning: true,
+    input: ["text", "image"],
+    contextWindow: 1000000,
+    maxTokens: 64000,
    privacy: "anonymized",
  },

@@ -229,8 +352,8 @@ export const VENICE_MODEL_CATALOG = [
    name: "GPT-5.2 (via Venice)",
    reasoning: true,
    input: ["text"],
-    contextWindow: 262144,
-    maxTokens: 8192,
+    contextWindow: 256000,
+    maxTokens: 65536,
    privacy: "anonymized",
  },
  {
@@ -238,8 +361,44 @@ export const VENICE_MODEL_CATALOG = [
    name: "GPT-5.2 Codex (via Venice)",
    reasoning: true,
    input: ["text", "image"],
-    contextWindow: 262144,
-    maxTokens: 8192,
+    contextWindow: 256000,
+    maxTokens: 65536,
+    privacy: "anonymized",
+  },
+  {
+    id: "openai-gpt-53-codex",
+    name: "GPT-5.3 Codex (via Venice)",
+    reasoning: true,
+    input: ["text", "image"],
+    contextWindow: 400000,
+    maxTokens: 128000,
+    privacy: "anonymized",
+  },
+  {
+    id: "openai-gpt-54",
+    name: "GPT-5.4 (via Venice)",
+    reasoning: true,
+    input: ["text", "image"],
+    contextWindow: 1000000,
+    maxTokens: 131072,
+    privacy: "anonymized",
+  },
+  {
+    id: "openai-gpt-4o-2024-11-20",
+    name: "GPT-4o (via Venice)",
+    reasoning: false,
+    input: ["text", "image"],
+    contextWindow: 128000,
+    maxTokens: 16384,
+    privacy: "anonymized",
+  },
+  {
+    id: "openai-gpt-4o-mini-2024-07-18",
+    name: "GPT-4o Mini (via Venice)",
+    reasoning: false,
+    input: ["text", "image"],
+    contextWindow: 128000,
+    maxTokens: 16384,
    privacy: "anonymized",
  },

@@ -249,8 +408,17 @@ export const VENICE_MODEL_CATALOG = [
    name: "Gemini 3 Pro (via Venice)",
    reasoning: true,
    input: ["text", "image"],
-    contextWindow: 202752,
-    maxTokens: 8192,
+    contextWindow: 198000,
+    maxTokens: 32768,
+    privacy: "anonymized",
+  },
+  {
+    id: "gemini-3-1-pro-preview",
+    name: "Gemini 3.1 Pro (via Venice)",
+    reasoning: true,
+    input: ["text", "image"],
+    contextWindow: 1000000,
+    maxTokens: 32768,
    privacy: "anonymized",
  },
  {
@@ -258,8 +426,8 @@ export const VENICE_MODEL_CATALOG = [
    name: "Gemini 3 Flash (via Venice)",
    reasoning: true,
    input: ["text", "image"],
-    contextWindow: 262144,
-    maxTokens: 8192,
+    contextWindow: 256000,
+    maxTokens: 65536,
    privacy: "anonymized",
  },

@@ -269,8 +437,8 @@ export const VENICE_MODEL_CATALOG = [
    name: "Grok 4.1 Fast (via Venice)",
    reasoning: true,
    input: ["text", "image"],
-    contextWindow: 262144,
-    maxTokens: 8192,
+    contextWindow: 1000000,
+    maxTokens: 30000,
    privacy: "anonymized",
  },
  {
@@ -278,28 +446,8 @@ export const VENICE_MODEL_CATALOG = [
    name: "Grok Code Fast 1 (via Venice)",
    reasoning: true,
    input: ["text"],
-    contextWindow: 262144,
-    maxTokens: 8192,
-    privacy: "anonymized",
-  },
-
-  // Other anonymized models
-  {
-    id: "kimi-k2-thinking",
-    name: "Kimi K2 Thinking (via Venice)",
-    reasoning: true,
-    input: ["text"],
-    contextWindow: 262144,
-    maxTokens: 8192,
-    privacy: "anonymized",
-  },
-  {
-    id: "minimax-m21",
-    name: "MiniMax M2.5 (via Venice)",
-    reasoning: true,
-    input: ["text"],
-    contextWindow: 202752,
-    maxTokens: 8192,
+    contextWindow: 256000,
+    maxTokens: 10000,
    privacy: "anonymized",
  },
 ] as const;
@@ -326,6 +474,7 @@ export function buildVeniceModelDefinition(entry: VeniceCatalogEntry): ModelDefi
    // See: https://github.com/openclaw/openclaw/issues/15819
    compat: {
      supportsUsageInStreaming: false,
+      ...("supportsTools" in entry && !entry.supportsTools ? { supportsTools: false } : {}),
    },
  };
 }
@@ -334,17 +483,18 @@ export function buildVeniceModelDefinition(entry: VeniceCatalogEntry): ModelDefi
 interface VeniceModelSpec {
  name: string;
  privacy: "private" | "anonymized";
-  availableContextTokens: number;
-  capabilities: {
-    supportsReasoning: boolean;
-    supportsVision: boolean;
-    supportsFunctionCalling: boolean;
+  availableContextTokens?: number;
+  maxCompletionTokens?: number;
+  capabilities?: {
+    supportsReasoning?: boolean;
+    supportsVision?: boolean;
+    supportsFunctionCalling?: boolean;
  };
 }

 interface VeniceModel {
  id: string;
-  model_spec: VeniceModelSpec;
+  model_spec?: VeniceModelSpec;
 }

 interface VeniceModelsResponse {
@@ -412,6 +562,36 @@ function isRetryableVeniceDiscoveryError(err: unknown): boolean {
  return hasRetryableNetworkCode(err);
 }

+function normalizePositiveInt(value: unknown): number | undefined {
+  if (typeof value !== "number" || !Number.isFinite(value) || value <= 0) {
+    return undefined;
+  }
+  return Math.floor(value);
+}
+
+function resolveApiMaxCompletionTokens(params: {
+  apiModel: VeniceModel;
+  knownMaxTokens?: number;
+}): number | undefined {
+  const raw = normalizePositiveInt(params.apiModel.model_spec?.maxCompletionTokens);
+  if (!raw) {
+    return undefined;
+  }
+  const contextWindow = normalizePositiveInt(params.apiModel.model_spec?.availableContextTokens);
+  const knownMaxTokens =
+    typeof params.knownMaxTokens === "number" && Number.isFinite(params.knownMaxTokens)
+      ? Math.floor(params.knownMaxTokens)
+      : undefined;
+  const hardCap = knownMaxTokens ?? VENICE_DISCOVERY_HARD_MAX_TOKENS;
+  const fallbackContextWindow = knownMaxTokens ?? VENICE_DEFAULT_CONTEXT_WINDOW;
+  return Math.min(raw, contextWindow ?? fallbackContextWindow, hardCap);
+}
+
+function resolveApiSupportsTools(apiModel: VeniceModel): boolean | undefined {
+  const supportsFunctionCalling = apiModel.model_spec?.capabilities?.supportsFunctionCalling;
+  return typeof supportsFunctionCalling === "boolean" ? supportsFunctionCalling : undefined;
+}
+
 /**
 * Discover models from Venice API with fallback to static catalog.
 * The /models endpoint is public and doesn't require authentication.
@@ -468,30 +648,50 @@ export async function discoverVeniceModels(): Promise<ModelDefinitionConfig[]> {

    for (const apiModel of data.data) {
      const catalogEntry = catalogById.get(apiModel.id);
+      const apiMaxTokens = resolveApiMaxCompletionTokens({
+        apiModel,
+        knownMaxTokens: catalogEntry?.maxTokens,
+      });
+      const apiSupportsTools = resolveApiSupportsTools(apiModel);
      if (catalogEntry) {
-        // Use catalog metadata for known models
-        models.push(buildVeniceModelDefinition(catalogEntry));
+        const definition = buildVeniceModelDefinition(catalogEntry);
+        if (apiMaxTokens !== undefined) {
+          definition.maxTokens = apiMaxTokens;
+        }
+        // We only let live discovery disable tools. Re-enabling tool support still
+        // requires a catalog update so a transient/bad /models response cannot
+        // silently expand the tool execution surface for known models.
+        if (apiSupportsTools === false) {
+          definition.compat = {
+            ...definition.compat,
+            supportsTools: false,
+          };
+        }
+        models.push(definition);
      } else {
        // Create definition for newly discovered models not in catalog
+        const apiSpec = apiModel.model_spec;
        const isReasoning =
-          apiModel.model_spec.capabilities.supportsReasoning ||
+          apiSpec?.capabilities?.supportsReasoning ||
          apiModel.id.toLowerCase().includes("thinking") ||
          apiModel.id.toLowerCase().includes("reason") ||
          apiModel.id.toLowerCase().includes("r1");

-        const hasVision = apiModel.model_spec.capabilities.supportsVision;
+        const hasVision = apiSpec?.capabilities?.supportsVision === true;

        models.push({
          id: apiModel.id,
-          name: apiModel.model_spec.name || apiModel.id,
+          name: apiSpec?.name || apiModel.id,
          reasoning: isReasoning,
          input: hasVision ? ["text", "image"] : ["text"],
          cost: VENICE_DEFAULT_COST,
-          contextWindow: apiModel.model_spec.availableContextTokens || 128000,
-          maxTokens: 8192,
+          contextWindow:
+            normalizePositiveInt(apiSpec?.availableContextTokens) ?? VENICE_DEFAULT_CONTEXT_WINDOW,
+          maxTokens: apiMaxTokens ?? VENICE_DEFAULT_MAX_TOKENS,
          // Avoid usage-only streaming chunks that can break OpenAI-compatible parsers.
          compat: {
            supportsUsageInStreaming: false,
+            ...(apiSupportsTools === false ? { supportsTools: false } : {}),
          },
        });
      }