feat: add NVIDIA API provider integration

Add support for NVIDIA's API (https://integrate.api.nvidia.com/v1) with three models:
- nvidia/llama-3.1-nemotron-70b-instruct (default)
- nvidia/llama-3.3-70b-instruct
- nvidia/mistral-nemo-minitron-8b-8k-instruct

Users can configure via NVIDIA_API_KEY environment variable or auth profiles.

Co-authored-by: thesomewhatyou <162917831+thesomewhatyou@users.noreply.github.com>
This commit is contained in:
anthropic-code-agent[bot]
2026-02-08 01:48:13 +00:00
committed by Peter Steinberger
parent 84ed9ab554
commit c640b5f86c
4 changed files with 203 additions and 0 deletions

View File

@@ -112,6 +112,17 @@ const QIANFAN_DEFAULT_COST = {
cacheWrite: 0,
};
const NVIDIA_BASE_URL = "https://integrate.api.nvidia.com/v1";
const NVIDIA_DEFAULT_MODEL_ID = "nvidia/llama-3.1-nemotron-70b-instruct";
const NVIDIA_DEFAULT_CONTEXT_WINDOW = 131072;
const NVIDIA_DEFAULT_MAX_TOKENS = 4096;
const NVIDIA_DEFAULT_COST = {
input: 0,
output: 0,
cacheRead: 0,
cacheWrite: 0,
};
interface OllamaModel {
name: string;
modified_at: string;
@@ -609,6 +620,42 @@ export function buildQianfanProvider(): ProviderConfig {
};
}
export function buildNvidiaProvider(): ProviderConfig {
return {
baseUrl: NVIDIA_BASE_URL,
api: "openai-completions",
models: [
{
id: NVIDIA_DEFAULT_MODEL_ID,
name: "NVIDIA Llama 3.1 Nemotron 70B Instruct",
reasoning: false,
input: ["text"],
cost: NVIDIA_DEFAULT_COST,
contextWindow: NVIDIA_DEFAULT_CONTEXT_WINDOW,
maxTokens: NVIDIA_DEFAULT_MAX_TOKENS,
},
{
id: "nvidia/llama-3.3-70b-instruct",
name: "NVIDIA Llama 3.3 70B Instruct",
reasoning: false,
input: ["text"],
cost: NVIDIA_DEFAULT_COST,
contextWindow: 131072,
maxTokens: 4096,
},
{
id: "nvidia/mistral-nemo-minitron-8b-8k-instruct",
name: "NVIDIA Mistral NeMo Minitron 8B Instruct",
reasoning: false,
input: ["text"],
cost: NVIDIA_DEFAULT_COST,
contextWindow: 8192,
maxTokens: 2048,
},
],
};
}
export async function resolveImplicitProviders(params: {
agentDir: string;
explicitProviders?: Record<string, ProviderConfig> | null;
@@ -753,6 +800,13 @@ export async function resolveImplicitProviders(params: {
providers.qianfan = { ...buildQianfanProvider(), apiKey: qianfanKey };
}
const nvidiaKey =
resolveEnvApiKeyVarName("nvidia") ??
resolveApiKeyFromProfiles({ provider: "nvidia", store: authStore });
if (nvidiaKey) {
providers.nvidia = { ...buildNvidiaProvider(), apiKey: nvidiaKey };
}
return providers;
}