feat: add NVIDIA API provider integration

Add support for NVIDIA's API (https://integrate.api.nvidia.com/v1) with three models: - nvidia/llama-3.1-nemotron-70b-instruct (default) - nvidia/llama-3.3-70b-instruct - nvidia/mistral-nemo-minitron-8b-8k-instruct Users can configure via NVIDIA_API_KEY environment variable or auth profiles. Co-authored-by: thesomewhatyou <162917831+thesomewhatyou@users.noreply.github.com>
2026-05-09 09:47:40 +00:00 · 2026-02-08 01:48:13 +00:00
parent 84ed9ab554
commit c640b5f86c
4 changed files with 203 additions and 0 deletions
--- a/src/agents/models-config.providers.ts
+++ b/src/agents/models-config.providers.ts
@@ -112,6 +112,17 @@ const QIANFAN_DEFAULT_COST = {
  cacheWrite: 0,
 };

+const NVIDIA_BASE_URL = "https://integrate.api.nvidia.com/v1";
+const NVIDIA_DEFAULT_MODEL_ID = "nvidia/llama-3.1-nemotron-70b-instruct";
+const NVIDIA_DEFAULT_CONTEXT_WINDOW = 131072;
+const NVIDIA_DEFAULT_MAX_TOKENS = 4096;
+const NVIDIA_DEFAULT_COST = {
+  input: 0,
+  output: 0,
+  cacheRead: 0,
+  cacheWrite: 0,
+};
+
 interface OllamaModel {
  name: string;
  modified_at: string;
@@ -609,6 +620,42 @@ export function buildQianfanProvider(): ProviderConfig {
  };
 }

+export function buildNvidiaProvider(): ProviderConfig {
+  return {
+    baseUrl: NVIDIA_BASE_URL,
+    api: "openai-completions",
+    models: [
+      {
+        id: NVIDIA_DEFAULT_MODEL_ID,
+        name: "NVIDIA Llama 3.1 Nemotron 70B Instruct",
+        reasoning: false,
+        input: ["text"],
+        cost: NVIDIA_DEFAULT_COST,
+        contextWindow: NVIDIA_DEFAULT_CONTEXT_WINDOW,
+        maxTokens: NVIDIA_DEFAULT_MAX_TOKENS,
+      },
+      {
+        id: "nvidia/llama-3.3-70b-instruct",
+        name: "NVIDIA Llama 3.3 70B Instruct",
+        reasoning: false,
+        input: ["text"],
+        cost: NVIDIA_DEFAULT_COST,
+        contextWindow: 131072,
+        maxTokens: 4096,
+      },
+      {
+        id: "nvidia/mistral-nemo-minitron-8b-8k-instruct",
+        name: "NVIDIA Mistral NeMo Minitron 8B Instruct",
+        reasoning: false,
+        input: ["text"],
+        cost: NVIDIA_DEFAULT_COST,
+        contextWindow: 8192,
+        maxTokens: 2048,
+      },
+    ],
+  };
+}
+
 export async function resolveImplicitProviders(params: {
  agentDir: string;
  explicitProviders?: Record<string, ProviderConfig> | null;
@@ -753,6 +800,13 @@ export async function resolveImplicitProviders(params: {
    providers.qianfan = { ...buildQianfanProvider(), apiKey: qianfanKey };
  }

+  const nvidiaKey =
+    resolveEnvApiKeyVarName("nvidia") ??
+    resolveApiKeyFromProfiles({ provider: "nvidia", store: authStore });
+  if (nvidiaKey) {
+    providers.nvidia = { ...buildNvidiaProvider(), apiKey: nvidiaKey };
+  }
+
  return providers;
 }