From 97d1b85ed21a2d0e7d50e748886cd295fa4a4753 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Thu, 16 Oct 2025 03:12:23 +0000 Subject: [PATCH] =?UTF-8?q?chore:=20=E5=90=8C=E6=AD=A5=E6=A8=A1=E5=9E=8B?= =?UTF-8?q?=E4=BB=B7=E6=A0=BC=E6=95=B0=E6=8D=AE=20(3fa853a6cb4e9023c170a9c?= =?UTF-8?q?3fc9ad95dfb2cd6304b623bcedf49e3797af7d8b7)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- model_prices_and_context_window.json | 1809 +++++++++++++++++++----- model_prices_and_context_window.sha256 | 2 +- 2 files changed, 1449 insertions(+), 362 deletions(-) diff --git a/model_prices_and_context_window.json b/model_prices_and_context_window.json index 3987fe7e..bfb7352b 100644 --- a/model_prices_and_context_window.json +++ b/model_prices_and_context_window.json @@ -400,6 +400,44 @@ "supports_response_schema": true, "supports_tool_choice": true }, + "anthropic.claude-haiku-4-5-20251001-v1:0": { + "cache_creation_input_token_cost": 1.25e-06, + "cache_read_input_token_cost": 1e-07, + "input_cost_per_token": 1e-06, + "litellm_provider": "bedrock", + "max_input_tokens": 200000, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 5e-06, + "source": "https://aws.amazon.com/about-aws/whats-new/2025/10/claude-4-5-haiku-anthropic-amazon-bedrock", + "supports_assistant_prefill": true, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true + }, + "anthropic.claude-haiku-4-5@20251001": { + "cache_creation_input_token_cost": 1.25e-06, + "cache_read_input_token_cost": 1e-07, + "input_cost_per_token": 1e-06, + "litellm_provider": "bedrock", + "max_input_tokens": 200000, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 5e-06, + "source": "https://aws.amazon.com/about-aws/whats-new/2025/10/claude-4-5-haiku-anthropic-amazon-bedrock", + "supports_assistant_prefill": true, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true + }, "anthropic.claude-3-5-sonnet-20240620-v1:0": { "input_cost_per_token": 3e-06, "litellm_provider": "bedrock", @@ -810,6 +848,25 @@ "supports_tool_choice": true, "supports_vision": true }, + "apac.anthropic.claude-haiku-4-5-20251001-v1:0": { + "cache_creation_input_token_cost": 1.25e-06, + "cache_read_input_token_cost": 1e-07, + "input_cost_per_token": 1e-06, + "litellm_provider": "bedrock", + "max_input_tokens": 200000, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 5e-06, + "source": "https://aws.amazon.com/about-aws/whats-new/2025/10/claude-4-5-haiku-anthropic-amazon-bedrock", + "supports_assistant_prefill": true, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true + }, "apac.anthropic.claude-3-sonnet-20240229-v1:0": { "input_cost_per_token": 3e-06, "litellm_provider": "bedrock", @@ -866,6 +923,36 @@ "mode": "audio_transcription", "output_cost_per_second": 0.0 }, + "au.anthropic.claude-sonnet-4-5-20250929-v1:0": { + "cache_creation_input_token_cost": 4.125e-06, + "cache_read_input_token_cost": 3.3e-07, + "input_cost_per_token": 3.3e-06, + "input_cost_per_token_above_200k_tokens": 6.6e-06, + "output_cost_per_token_above_200k_tokens": 2.475e-05, + "cache_creation_input_token_cost_above_200k_tokens": 8.25e-06, + "cache_read_input_token_cost_above_200k_tokens": 6.6e-07, + "litellm_provider": "bedrock_converse", + "max_input_tokens": 200000, + "max_output_tokens": 64000, + "max_tokens": 200000, + "mode": "chat", + "output_cost_per_token": 1.65e-05, + "search_context_cost_per_query": { + "search_context_size_high": 0.01, + "search_context_size_low": 0.01, + "search_context_size_medium": 0.01 + }, + "supports_assistant_prefill": true, + "supports_computer_use": true, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true, + "tool_use_system_prompt_tokens": 346 + }, "azure/ada": { "input_cost_per_token": 1e-07, "litellm_provider": "azure", @@ -2004,9 +2091,9 @@ "cache_read_input_token_cost": 1.25e-07, "input_cost_per_token": 1.25e-06, "litellm_provider": "azure", - "max_input_tokens": 272000, - "max_output_tokens": 128000, - "max_tokens": 128000, + "max_input_tokens": 128000, + "max_output_tokens": 16384, + "max_tokens": 16384, "mode": "chat", "output_cost_per_token": 1e-05, "supported_endpoints": [ @@ -3164,6 +3251,42 @@ "supports_function_calling": true, "supports_vision": true }, + "azure_ai/Phi-4-mini-reasoning": { + "input_cost_per_token": 8e-08, + "litellm_provider": "azure_ai", + "max_input_tokens": 131072, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 3.2e-07, + "source": "https://azure.microsoft.com/en-us/pricing/details/ai-foundry-models/microsoft/", + "supports_function_calling": true + }, + "azure_ai/Phi-4-reasoning": { + "input_cost_per_token": 1.25e-07, + "litellm_provider": "azure_ai", + "max_input_tokens": 32768, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_token": 5e-07, + "source": "https://azure.microsoft.com/en-us/pricing/details/ai-foundry-models/microsoft/", + "supports_function_calling": true, + "supports_tool_choice": true, + "supports_reasoning": true + }, + "azure_ai/MAI-DS-R1": { + "input_cost_per_token": 1.35e-06, + "litellm_provider": "azure_ai", + "max_input_tokens": 128000, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 5.4e-06, + "source": "https://azure.microsoft.com/en-us/pricing/details/ai-foundry-models/microsoft/", + "supports_reasoning": true, + "supports_tool_choice": true + }, "azure_ai/cohere-rerank-v3-english": { "input_cost_per_query": 0.002, "input_cost_per_token": 0.0, @@ -3308,6 +3431,63 @@ "supports_tool_choice": true, "supports_web_search": true }, + "azure_ai/grok-4": { + "input_cost_per_token": 5.5e-06, + "litellm_provider": "azure_ai", + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "max_tokens": 131072, + "mode": "chat", + "output_cost_per_token": 2.75e-05, + "source": "https://azure.microsoft.com/en-us/blog/grok-4-is-now-available-in-azure-ai-foundry-unlock-frontier-intelligence-and-business-ready-capabilities/", + "supports_function_calling": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_web_search": true + }, + "azure_ai/grok-4-fast-non-reasoning": { + "input_cost_per_token": 0.43e-06, + "output_cost_per_token": 1.73e-06, + "litellm_provider": "azure_ai", + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "max_tokens": 131072, + "mode": "chat", + "supports_function_calling": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_web_search": true + }, + "azure_ai/grok-4-fast-reasoning": { + "input_cost_per_token": 0.43e-06, + "output_cost_per_token": 1.73e-06, + "litellm_provider": "azure_ai", + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "max_tokens": 131072, + "mode": "chat", + "source": "https://techcommunity.microsoft.com/blog/azure-ai-foundry-blog/announcing-the-grok-4-fast-models-from-xai-now-available-in-azure-ai-foundry/4456701", + "supports_function_calling": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_web_search": true + }, + "azure_ai/grok-code-fast-1": { + "input_cost_per_token": 3.5e-06, + "litellm_provider": "azure_ai", + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "max_tokens": 131072, + "mode": "chat", + "output_cost_per_token": 1.75e-05, + "source": "https://azure.microsoft.com/en-us/blog/grok-4-is-now-available-in-azure-ai-foundry-unlock-frontier-intelligence-and-business-ready-capabilities/", + "supports_function_calling": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_web_search": true + }, "azure_ai/jais-30b-chat": { "input_cost_per_token": 0.0032, "litellm_provider": "azure_ai", @@ -4489,6 +4669,48 @@ "supports_web_search": true, "tool_use_system_prompt_tokens": 264 }, + "claude-haiku-4-5-20251001": { + "cache_creation_input_token_cost": 1.25e-06, + "cache_creation_input_token_cost_above_1hr": 2e-06, + "cache_read_input_token_cost": 1e-07, + "input_cost_per_token": 1e-06, + "litellm_provider": "anthropic", + "max_input_tokens": 200000, + "max_output_tokens": 64000, + "max_tokens": 64000, + "mode": "chat", + "output_cost_per_token": 5e-06, + "supports_assistant_prefill": true, + "supports_function_calling": true, + "supports_computer_use": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true + }, + "claude-haiku-4-5": { + "cache_creation_input_token_cost": 1.25e-06, + "cache_creation_input_token_cost_above_1hr": 2e-06, + "cache_read_input_token_cost": 1e-07, + "input_cost_per_token": 1e-06, + "litellm_provider": "anthropic", + "max_input_tokens": 200000, + "max_output_tokens": 64000, + "max_tokens": 64000, + "mode": "chat", + "output_cost_per_token": 5e-06, + "supports_assistant_prefill": true, + "supports_function_calling": true, + "supports_computer_use": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true + }, "claude-3-5-sonnet-20240620": { "cache_creation_input_token_cost": 3.75e-06, "cache_creation_input_token_cost_above_1hr": 6e-06, @@ -4718,7 +4940,7 @@ "input_cost_per_token_above_200k_tokens": 6e-06, "litellm_provider": "anthropic", "max_input_tokens": 1000000, - "max_output_tokens": 1000000, + "max_output_tokens": 64000, "max_tokens": 1000000, "mode": "chat", "output_cost_per_token": 1.5e-05, @@ -4739,10 +4961,14 @@ "supports_vision": true, "tool_use_system_prompt_tokens": 159 }, - "anthropic/claude-sonnet-4-5": { + "claude-sonnet-4-5": { "cache_creation_input_token_cost": 3.75e-06, "cache_read_input_token_cost": 3e-07, "input_cost_per_token": 3e-06, + "input_cost_per_token_above_200k_tokens": 6e-06, + "output_cost_per_token_above_200k_tokens": 2.25e-05, + "cache_creation_input_token_cost_above_200k_tokens": 7.5e-06, + "cache_read_input_token_cost_above_200k_tokens": 6e-07, "litellm_provider": "anthropic", "max_input_tokens": 200000, "max_output_tokens": 64000, @@ -4769,6 +4995,10 @@ "cache_creation_input_token_cost": 3.75e-06, "cache_read_input_token_cost": 3e-07, "input_cost_per_token": 3e-06, + "input_cost_per_token_above_200k_tokens": 6e-06, + "output_cost_per_token_above_200k_tokens": 2.25e-05, + "cache_creation_input_token_cost_above_200k_tokens": 7.5e-06, + "cache_read_input_token_cost_above_200k_tokens": 6e-07, "litellm_provider": "anthropic", "max_input_tokens": 200000, "max_output_tokens": 64000, @@ -5229,6 +5459,16 @@ "output_cost_per_token": 0.0, "supports_embedding_image_input": true }, + "cohere.embed-v4:0": { + "input_cost_per_token": 1.2e-07, + "litellm_provider": "bedrock", + "max_input_tokens": 128000, + "max_tokens": 128000, + "mode": "embedding", + "output_cost_per_token": 0.0, + "output_vector_size": 1536, + "supports_embedding_image_input": true + }, "cohere.rerank-v3-5:0": { "input_cost_per_query": 0.002, "input_cost_per_token": 0.0, @@ -6627,629 +6867,679 @@ ] }, "deepinfra/Gryphe/MythoMax-L2-13b": { - "input_cost_per_token": 7.2e-08, - "litellm_provider": "deepinfra", + "max_tokens": 4096, "max_input_tokens": 4096, "max_output_tokens": 4096, - "max_tokens": 4096, + "input_cost_per_token": 8e-08, + "output_cost_per_token": 9e-08, + "litellm_provider": "deepinfra", "mode": "chat", - "output_cost_per_token": 7.2e-08, "supports_tool_choice": true }, "deepinfra/NousResearch/Hermes-3-Llama-3.1-405B": { - "input_cost_per_token": 7e-07, - "litellm_provider": "deepinfra", + "max_tokens": 131072, "max_input_tokens": 131072, "max_output_tokens": 131072, - "max_tokens": 131072, + "input_cost_per_token": 1e-06, + "output_cost_per_token": 1e-06, + "litellm_provider": "deepinfra", "mode": "chat", - "output_cost_per_token": 8e-07, "supports_tool_choice": true }, "deepinfra/NousResearch/Hermes-3-Llama-3.1-70B": { - "input_cost_per_token": 1e-07, - "litellm_provider": "deepinfra", + "max_tokens": 131072, "max_input_tokens": 131072, "max_output_tokens": 131072, - "max_tokens": 131072, + "input_cost_per_token": 3e-07, + "output_cost_per_token": 3e-07, + "litellm_provider": "deepinfra", "mode": "chat", - "output_cost_per_token": 2.8e-07, "supports_tool_choice": false }, "deepinfra/Qwen/QwQ-32B": { - "input_cost_per_token": 1.5e-07, - "litellm_provider": "deepinfra", + "max_tokens": 131072, "max_input_tokens": 131072, "max_output_tokens": 131072, - "max_tokens": 131072, - "mode": "chat", + "input_cost_per_token": 1.5e-07, "output_cost_per_token": 4e-07, + "litellm_provider": "deepinfra", + "mode": "chat", "supports_tool_choice": true }, "deepinfra/Qwen/Qwen2.5-72B-Instruct": { - "input_cost_per_token": 1.2e-07, - "litellm_provider": "deepinfra", + "max_tokens": 32768, "max_input_tokens": 32768, "max_output_tokens": 32768, - "max_tokens": 32768, - "mode": "chat", + "input_cost_per_token": 1.2e-07, "output_cost_per_token": 3.9e-07, + "litellm_provider": "deepinfra", + "mode": "chat", "supports_tool_choice": true }, "deepinfra/Qwen/Qwen2.5-7B-Instruct": { - "input_cost_per_token": 4e-08, - "litellm_provider": "deepinfra", + "max_tokens": 32768, "max_input_tokens": 32768, "max_output_tokens": 32768, - "max_tokens": 32768, - "mode": "chat", + "input_cost_per_token": 4e-08, "output_cost_per_token": 1e-07, + "litellm_provider": "deepinfra", + "mode": "chat", "supports_tool_choice": false }, "deepinfra/Qwen/Qwen2.5-VL-32B-Instruct": { - "input_cost_per_token": 2e-07, - "litellm_provider": "deepinfra", + "max_tokens": 128000, "max_input_tokens": 128000, "max_output_tokens": 128000, - "max_tokens": 128000, - "mode": "chat", + "input_cost_per_token": 2e-07, "output_cost_per_token": 6e-07, + "litellm_provider": "deepinfra", + "mode": "chat", "supports_tool_choice": true }, "deepinfra/Qwen/Qwen3-14B": { - "input_cost_per_token": 6e-08, - "litellm_provider": "deepinfra", + "max_tokens": 40960, "max_input_tokens": 40960, "max_output_tokens": 40960, - "max_tokens": 40960, - "mode": "chat", + "input_cost_per_token": 6e-08, "output_cost_per_token": 2.4e-07, + "litellm_provider": "deepinfra", + "mode": "chat", "supports_tool_choice": true }, "deepinfra/Qwen/Qwen3-235B-A22B": { - "input_cost_per_token": 1.3e-07, - "litellm_provider": "deepinfra", + "max_tokens": 40960, "max_input_tokens": 40960, "max_output_tokens": 40960, - "max_tokens": 40960, + "input_cost_per_token": 1.8e-07, + "output_cost_per_token": 5.4e-07, + "litellm_provider": "deepinfra", "mode": "chat", - "output_cost_per_token": 6e-07, "supports_tool_choice": true }, "deepinfra/Qwen/Qwen3-235B-A22B-Instruct-2507": { - "input_cost_per_token": 1.3e-07, - "litellm_provider": "deepinfra", + "max_tokens": 262144, "max_input_tokens": 262144, "max_output_tokens": 262144, - "max_tokens": 262144, - "mode": "chat", + "input_cost_per_token": 9e-08, "output_cost_per_token": 6e-07, + "litellm_provider": "deepinfra", + "mode": "chat", "supports_tool_choice": true }, "deepinfra/Qwen/Qwen3-235B-A22B-Thinking-2507": { - "input_cost_per_token": 1.3e-07, - "litellm_provider": "deepinfra", + "max_tokens": 262144, "max_input_tokens": 262144, "max_output_tokens": 262144, - "max_tokens": 262144, + "input_cost_per_token": 3e-07, + "output_cost_per_token": 2.9e-06, + "litellm_provider": "deepinfra", "mode": "chat", - "output_cost_per_token": 6e-07, "supports_tool_choice": true }, "deepinfra/Qwen/Qwen3-30B-A3B": { - "input_cost_per_token": 8e-08, - "litellm_provider": "deepinfra", + "max_tokens": 40960, "max_input_tokens": 40960, "max_output_tokens": 40960, - "max_tokens": 40960, - "mode": "chat", + "input_cost_per_token": 8e-08, "output_cost_per_token": 2.9e-07, + "litellm_provider": "deepinfra", + "mode": "chat", "supports_tool_choice": true }, "deepinfra/Qwen/Qwen3-32B": { - "input_cost_per_token": 1e-07, - "litellm_provider": "deepinfra", + "max_tokens": 40960, "max_input_tokens": 40960, "max_output_tokens": 40960, - "max_tokens": 40960, + "input_cost_per_token": 1e-07, + "output_cost_per_token": 2.8e-07, + "litellm_provider": "deepinfra", "mode": "chat", - "output_cost_per_token": 3e-07, "supports_tool_choice": true }, "deepinfra/Qwen/Qwen3-Coder-480B-A35B-Instruct": { - "input_cost_per_token": 4e-07, - "litellm_provider": "deepinfra", + "max_tokens": 262144, "max_input_tokens": 262144, "max_output_tokens": 262144, - "max_tokens": 262144, - "mode": "chat", + "input_cost_per_token": 4e-07, "output_cost_per_token": 1.6e-06, + "litellm_provider": "deepinfra", + "mode": "chat", "supports_tool_choice": true }, "deepinfra/Qwen/Qwen3-Coder-480B-A35B-Instruct-Turbo": { - "cache_read_input_token_cost": 2.4e-07, - "input_cost_per_token": 3e-07, - "litellm_provider": "deepinfra", + "max_tokens": 262144, "max_input_tokens": 262144, "max_output_tokens": 262144, - "max_tokens": 262144, - "mode": "chat", + "input_cost_per_token": 2.9e-07, "output_cost_per_token": 1.2e-06, + "litellm_provider": "deepinfra", + "mode": "chat", + "supports_tool_choice": true + }, + "deepinfra/Qwen/Qwen3-Next-80B-A3B-Instruct": { + "max_tokens": 262144, + "max_input_tokens": 262144, + "max_output_tokens": 262144, + "input_cost_per_token": 1.4e-07, + "output_cost_per_token": 1.4e-06, + "litellm_provider": "deepinfra", + "mode": "chat", + "supports_tool_choice": true + }, + "deepinfra/Qwen/Qwen3-Next-80B-A3B-Thinking": { + "max_tokens": 262144, + "max_input_tokens": 262144, + "max_output_tokens": 262144, + "input_cost_per_token": 1.4e-07, + "output_cost_per_token": 1.4e-06, + "litellm_provider": "deepinfra", + "mode": "chat", "supports_tool_choice": true }, "deepinfra/Sao10K/L3-8B-Lunaris-v1-Turbo": { - "input_cost_per_token": 2e-08, - "litellm_provider": "deepinfra", + "max_tokens": 8192, "max_input_tokens": 8192, "max_output_tokens": 8192, - "max_tokens": 8192, - "mode": "chat", + "input_cost_per_token": 4e-08, "output_cost_per_token": 5e-08, + "litellm_provider": "deepinfra", + "mode": "chat", "supports_tool_choice": false }, "deepinfra/Sao10K/L3.1-70B-Euryale-v2.2": { - "input_cost_per_token": 6.5e-07, - "litellm_provider": "deepinfra", + "max_tokens": 131072, "max_input_tokens": 131072, "max_output_tokens": 131072, - "max_tokens": 131072, - "mode": "chat", + "input_cost_per_token": 6.5e-07, "output_cost_per_token": 7.5e-07, + "litellm_provider": "deepinfra", + "mode": "chat", "supports_tool_choice": false }, "deepinfra/Sao10K/L3.3-70B-Euryale-v2.3": { - "input_cost_per_token": 6.5e-07, - "litellm_provider": "deepinfra", + "max_tokens": 131072, "max_input_tokens": 131072, "max_output_tokens": 131072, - "max_tokens": 131072, - "mode": "chat", + "input_cost_per_token": 6.5e-07, "output_cost_per_token": 7.5e-07, + "litellm_provider": "deepinfra", + "mode": "chat", "supports_tool_choice": false }, "deepinfra/allenai/olmOCR-7B-0725-FP8": { - "input_cost_per_token": 2.7e-07, - "litellm_provider": "deepinfra", + "max_tokens": 16384, "max_input_tokens": 16384, "max_output_tokens": 16384, - "max_tokens": 16384, - "mode": "chat", + "input_cost_per_token": 2.7e-07, "output_cost_per_token": 1.5e-06, + "litellm_provider": "deepinfra", + "mode": "chat", "supports_tool_choice": false }, "deepinfra/anthropic/claude-3-7-sonnet-latest": { - "cache_read_input_token_cost": 3.3e-07, - "input_cost_per_token": 3.3e-06, - "litellm_provider": "deepinfra", + "max_tokens": 200000, "max_input_tokens": 200000, "max_output_tokens": 200000, - "max_tokens": 200000, - "mode": "chat", + "input_cost_per_token": 3.3e-06, "output_cost_per_token": 1.65e-05, + "cache_read_input_token_cost": 3.3e-07, + "litellm_provider": "deepinfra", + "mode": "chat", "supports_tool_choice": true }, "deepinfra/anthropic/claude-4-opus": { - "input_cost_per_token": 1.65e-05, - "litellm_provider": "deepinfra", + "max_tokens": 200000, "max_input_tokens": 200000, "max_output_tokens": 200000, - "max_tokens": 200000, - "mode": "chat", + "input_cost_per_token": 1.65e-05, "output_cost_per_token": 8.25e-05, + "litellm_provider": "deepinfra", + "mode": "chat", "supports_tool_choice": true }, "deepinfra/anthropic/claude-4-sonnet": { - "input_cost_per_token": 3.3e-06, - "litellm_provider": "deepinfra", + "max_tokens": 200000, "max_input_tokens": 200000, "max_output_tokens": 200000, - "max_tokens": 200000, - "mode": "chat", + "input_cost_per_token": 3.3e-06, "output_cost_per_token": 1.65e-05, + "litellm_provider": "deepinfra", + "mode": "chat", "supports_tool_choice": true }, "deepinfra/deepseek-ai/DeepSeek-R1": { - "input_cost_per_token": 7e-07, - "litellm_provider": "deepinfra", + "max_tokens": 163840, "max_input_tokens": 163840, "max_output_tokens": 163840, - "max_tokens": 163840, - "mode": "chat", + "input_cost_per_token": 7e-07, "output_cost_per_token": 2.4e-06, + "litellm_provider": "deepinfra", + "mode": "chat", "supports_tool_choice": true }, "deepinfra/deepseek-ai/DeepSeek-R1-0528": { - "cache_read_input_token_cost": 4e-07, - "input_cost_per_token": 5e-07, - "litellm_provider": "deepinfra", + "max_tokens": 163840, "max_input_tokens": 163840, "max_output_tokens": 163840, - "max_tokens": 163840, - "mode": "chat", + "input_cost_per_token": 5e-07, "output_cost_per_token": 2.15e-06, + "cache_read_input_token_cost": 4e-07, + "litellm_provider": "deepinfra", + "mode": "chat", "supports_tool_choice": true }, "deepinfra/deepseek-ai/DeepSeek-R1-0528-Turbo": { - "input_cost_per_token": 1e-06, - "litellm_provider": "deepinfra", + "max_tokens": 32768, "max_input_tokens": 32768, "max_output_tokens": 32768, - "max_tokens": 32768, - "mode": "chat", + "input_cost_per_token": 1e-06, "output_cost_per_token": 3e-06, + "litellm_provider": "deepinfra", + "mode": "chat", "supports_tool_choice": true }, "deepinfra/deepseek-ai/DeepSeek-R1-Distill-Llama-70B": { - "input_cost_per_token": 1e-07, - "litellm_provider": "deepinfra", + "max_tokens": 131072, "max_input_tokens": 131072, "max_output_tokens": 131072, - "max_tokens": 131072, + "input_cost_per_token": 2e-07, + "output_cost_per_token": 6e-07, + "litellm_provider": "deepinfra", "mode": "chat", - "output_cost_per_token": 4e-07, "supports_tool_choice": false }, "deepinfra/deepseek-ai/DeepSeek-R1-Distill-Qwen-32B": { - "input_cost_per_token": 7.5e-08, - "litellm_provider": "deepinfra", + "max_tokens": 131072, "max_input_tokens": 131072, "max_output_tokens": 131072, - "max_tokens": 131072, + "input_cost_per_token": 2.7e-07, + "output_cost_per_token": 2.7e-07, + "litellm_provider": "deepinfra", "mode": "chat", - "output_cost_per_token": 1.5e-07, "supports_tool_choice": true }, "deepinfra/deepseek-ai/DeepSeek-R1-Turbo": { - "input_cost_per_token": 1e-06, - "litellm_provider": "deepinfra", + "max_tokens": 40960, "max_input_tokens": 40960, "max_output_tokens": 40960, - "max_tokens": 40960, - "mode": "chat", + "input_cost_per_token": 1e-06, "output_cost_per_token": 3e-06, + "litellm_provider": "deepinfra", + "mode": "chat", "supports_tool_choice": true }, "deepinfra/deepseek-ai/DeepSeek-V3": { - "input_cost_per_token": 3.8e-07, - "litellm_provider": "deepinfra", + "max_tokens": 163840, "max_input_tokens": 163840, "max_output_tokens": 163840, - "max_tokens": 163840, - "mode": "chat", + "input_cost_per_token": 3.8e-07, "output_cost_per_token": 8.9e-07, + "litellm_provider": "deepinfra", + "mode": "chat", "supports_tool_choice": true }, "deepinfra/deepseek-ai/DeepSeek-V3-0324": { - "cache_read_input_token_cost": 2.24e-07, - "input_cost_per_token": 2.8e-07, - "litellm_provider": "deepinfra", + "max_tokens": 163840, "max_input_tokens": 163840, "max_output_tokens": 163840, - "max_tokens": 163840, - "mode": "chat", + "input_cost_per_token": 2.5e-07, "output_cost_per_token": 8.8e-07, + "litellm_provider": "deepinfra", + "mode": "chat", "supports_tool_choice": true }, "deepinfra/deepseek-ai/DeepSeek-V3.1": { - "cache_read_input_token_cost": 2.16e-07, - "input_cost_per_token": 2.7e-07, - "litellm_provider": "deepinfra", + "max_tokens": 163840, "max_input_tokens": 163840, "max_output_tokens": 163840, - "max_tokens": 163840, - "mode": "chat", + "input_cost_per_token": 2.7e-07, "output_cost_per_token": 1e-06, - "supports_reasoning": true, + "cache_read_input_token_cost": 2.16e-07, + "litellm_provider": "deepinfra", + "mode": "chat", + "supports_tool_choice": true, + "supports_reasoning": true + }, + "deepinfra/deepseek-ai/DeepSeek-V3.1-Terminus": { + "max_tokens": 163840, + "max_input_tokens": 163840, + "max_output_tokens": 163840, + "input_cost_per_token": 2.7e-07, + "output_cost_per_token": 1e-06, + "cache_read_input_token_cost": 2.16e-07, + "litellm_provider": "deepinfra", + "mode": "chat", "supports_tool_choice": true }, "deepinfra/google/gemini-2.0-flash-001": { - "input_cost_per_token": 1e-07, - "litellm_provider": "deepinfra", + "max_tokens": 1000000, "max_input_tokens": 1000000, "max_output_tokens": 1000000, - "max_tokens": 1000000, - "mode": "chat", + "input_cost_per_token": 1e-07, "output_cost_per_token": 4e-07, + "litellm_provider": "deepinfra", + "mode": "chat", "supports_tool_choice": true }, "deepinfra/google/gemini-2.5-flash": { - "input_cost_per_token": 2.1e-07, - "litellm_provider": "deepinfra", + "max_tokens": 1000000, "max_input_tokens": 1000000, "max_output_tokens": 1000000, - "max_tokens": 1000000, + "input_cost_per_token": 3e-07, + "output_cost_per_token": 2.5e-06, + "litellm_provider": "deepinfra", "mode": "chat", - "output_cost_per_token": 1.75e-06, "supports_tool_choice": true }, "deepinfra/google/gemini-2.5-pro": { - "input_cost_per_token": 8.75e-07, - "litellm_provider": "deepinfra", + "max_tokens": 1000000, "max_input_tokens": 1000000, "max_output_tokens": 1000000, - "max_tokens": 1000000, + "input_cost_per_token": 1.25e-06, + "output_cost_per_token": 1e-05, + "litellm_provider": "deepinfra", "mode": "chat", - "output_cost_per_token": 7e-06, "supports_tool_choice": true }, "deepinfra/google/gemma-3-12b-it": { - "input_cost_per_token": 5e-08, - "litellm_provider": "deepinfra", + "max_tokens": 131072, "max_input_tokens": 131072, "max_output_tokens": 131072, - "max_tokens": 131072, - "mode": "chat", + "input_cost_per_token": 5e-08, "output_cost_per_token": 1e-07, + "litellm_provider": "deepinfra", + "mode": "chat", "supports_tool_choice": true }, "deepinfra/google/gemma-3-27b-it": { - "input_cost_per_token": 9e-08, - "litellm_provider": "deepinfra", + "max_tokens": 131072, "max_input_tokens": 131072, "max_output_tokens": 131072, - "max_tokens": 131072, + "input_cost_per_token": 9e-08, + "output_cost_per_token": 1.6e-07, + "litellm_provider": "deepinfra", "mode": "chat", - "output_cost_per_token": 1.7e-07, "supports_tool_choice": true }, "deepinfra/google/gemma-3-4b-it": { - "input_cost_per_token": 4e-08, - "litellm_provider": "deepinfra", + "max_tokens": 131072, "max_input_tokens": 131072, "max_output_tokens": 131072, - "max_tokens": 131072, - "mode": "chat", + "input_cost_per_token": 4e-08, "output_cost_per_token": 8e-08, + "litellm_provider": "deepinfra", + "mode": "chat", "supports_tool_choice": true }, "deepinfra/meta-llama/Llama-3.2-11B-Vision-Instruct": { - "input_cost_per_token": 4.9e-08, - "litellm_provider": "deepinfra", + "max_tokens": 131072, "max_input_tokens": 131072, "max_output_tokens": 131072, - "max_tokens": 131072, - "mode": "chat", + "input_cost_per_token": 4.9e-08, "output_cost_per_token": 4.9e-08, + "litellm_provider": "deepinfra", + "mode": "chat", "supports_tool_choice": false }, "deepinfra/meta-llama/Llama-3.2-3B-Instruct": { - "input_cost_per_token": 1.2e-08, - "litellm_provider": "deepinfra", + "max_tokens": 131072, "max_input_tokens": 131072, "max_output_tokens": 131072, - "max_tokens": 131072, + "input_cost_per_token": 2e-08, + "output_cost_per_token": 2e-08, + "litellm_provider": "deepinfra", "mode": "chat", - "output_cost_per_token": 2.4e-08, "supports_tool_choice": true }, "deepinfra/meta-llama/Llama-3.3-70B-Instruct": { - "input_cost_per_token": 2.3e-07, - "litellm_provider": "deepinfra", + "max_tokens": 131072, "max_input_tokens": 131072, "max_output_tokens": 131072, - "max_tokens": 131072, - "mode": "chat", + "input_cost_per_token": 2.3e-07, "output_cost_per_token": 4e-07, + "litellm_provider": "deepinfra", + "mode": "chat", "supports_tool_choice": true }, "deepinfra/meta-llama/Llama-3.3-70B-Instruct-Turbo": { - "input_cost_per_token": 3.8e-08, - "litellm_provider": "deepinfra", + "max_tokens": 131072, "max_input_tokens": 131072, "max_output_tokens": 131072, - "max_tokens": 131072, + "input_cost_per_token": 1.3e-07, + "output_cost_per_token": 3.9e-07, + "litellm_provider": "deepinfra", "mode": "chat", - "output_cost_per_token": 1.2e-07, "supports_tool_choice": true }, "deepinfra/meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8": { - "input_cost_per_token": 1.5e-07, - "litellm_provider": "deepinfra", + "max_tokens": 1048576, "max_input_tokens": 1048576, "max_output_tokens": 1048576, - "max_tokens": 1048576, - "mode": "chat", + "input_cost_per_token": 1.5e-07, "output_cost_per_token": 6e-07, + "litellm_provider": "deepinfra", + "mode": "chat", "supports_tool_choice": true }, "deepinfra/meta-llama/Llama-4-Scout-17B-16E-Instruct": { - "input_cost_per_token": 8e-08, - "litellm_provider": "deepinfra", + "max_tokens": 327680, "max_input_tokens": 327680, "max_output_tokens": 327680, - "max_tokens": 327680, - "mode": "chat", + "input_cost_per_token": 8e-08, "output_cost_per_token": 3e-07, + "litellm_provider": "deepinfra", + "mode": "chat", "supports_tool_choice": true }, "deepinfra/meta-llama/Llama-Guard-3-8B": { - "input_cost_per_token": 5.5e-08, - "litellm_provider": "deepinfra", + "max_tokens": 131072, "max_input_tokens": 131072, "max_output_tokens": 131072, - "max_tokens": 131072, - "mode": "chat", + "input_cost_per_token": 5.5e-08, "output_cost_per_token": 5.5e-08, + "litellm_provider": "deepinfra", + "mode": "chat", "supports_tool_choice": false }, "deepinfra/meta-llama/Llama-Guard-4-12B": { - "input_cost_per_token": 1.8e-07, - "litellm_provider": "deepinfra", + "max_tokens": 163840, "max_input_tokens": 163840, "max_output_tokens": 163840, - "max_tokens": 163840, - "mode": "chat", + "input_cost_per_token": 1.8e-07, "output_cost_per_token": 1.8e-07, + "litellm_provider": "deepinfra", + "mode": "chat", "supports_tool_choice": false }, "deepinfra/meta-llama/Meta-Llama-3-8B-Instruct": { - "input_cost_per_token": 3e-08, - "litellm_provider": "deepinfra", + "max_tokens": 8192, "max_input_tokens": 8192, "max_output_tokens": 8192, - "max_tokens": 8192, - "mode": "chat", + "input_cost_per_token": 3e-08, "output_cost_per_token": 6e-08, + "litellm_provider": "deepinfra", + "mode": "chat", "supports_tool_choice": true }, "deepinfra/meta-llama/Meta-Llama-3.1-70B-Instruct": { - "input_cost_per_token": 2.3e-07, - "litellm_provider": "deepinfra", + "max_tokens": 131072, "max_input_tokens": 131072, "max_output_tokens": 131072, - "max_tokens": 131072, - "mode": "chat", + "input_cost_per_token": 4e-07, "output_cost_per_token": 4e-07, + "litellm_provider": "deepinfra", + "mode": "chat", "supports_tool_choice": true }, "deepinfra/meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo": { - "input_cost_per_token": 1e-07, - "litellm_provider": "deepinfra", + "max_tokens": 131072, "max_input_tokens": 131072, "max_output_tokens": 131072, - "max_tokens": 131072, - "mode": "chat", + "input_cost_per_token": 1e-07, "output_cost_per_token": 2.8e-07, + "litellm_provider": "deepinfra", + "mode": "chat", "supports_tool_choice": true }, "deepinfra/meta-llama/Meta-Llama-3.1-8B-Instruct": { - "input_cost_per_token": 3e-08, - "litellm_provider": "deepinfra", + "max_tokens": 131072, "max_input_tokens": 131072, "max_output_tokens": 131072, - "max_tokens": 131072, - "mode": "chat", + "input_cost_per_token": 3e-08, "output_cost_per_token": 5e-08, + "litellm_provider": "deepinfra", + "mode": "chat", "supports_tool_choice": true }, "deepinfra/meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo": { - "input_cost_per_token": 1.5e-08, - "litellm_provider": "deepinfra", + "max_tokens": 131072, "max_input_tokens": 131072, "max_output_tokens": 131072, - "max_tokens": 131072, + "input_cost_per_token": 2e-08, + "output_cost_per_token": 3e-08, + "litellm_provider": "deepinfra", "mode": "chat", - "output_cost_per_token": 2e-08, "supports_tool_choice": true }, "deepinfra/microsoft/WizardLM-2-8x22B": { - "input_cost_per_token": 4.8e-07, - "litellm_provider": "deepinfra", + "max_tokens": 65536, "max_input_tokens": 65536, "max_output_tokens": 65536, - "max_tokens": 65536, - "mode": "chat", + "input_cost_per_token": 4.8e-07, "output_cost_per_token": 4.8e-07, + "litellm_provider": "deepinfra", + "mode": "chat", "supports_tool_choice": false }, "deepinfra/microsoft/phi-4": { - "input_cost_per_token": 7e-08, - "litellm_provider": "deepinfra", + "max_tokens": 16384, "max_input_tokens": 16384, "max_output_tokens": 16384, - "max_tokens": 16384, - "mode": "chat", + "input_cost_per_token": 7e-08, "output_cost_per_token": 1.4e-07, + "litellm_provider": "deepinfra", + "mode": "chat", "supports_tool_choice": true }, "deepinfra/mistralai/Mistral-Nemo-Instruct-2407": { - "input_cost_per_token": 2e-08, - "litellm_provider": "deepinfra", + "max_tokens": 131072, "max_input_tokens": 131072, "max_output_tokens": 131072, - "max_tokens": 131072, - "mode": "chat", + "input_cost_per_token": 2e-08, "output_cost_per_token": 4e-08, + "litellm_provider": "deepinfra", + "mode": "chat", "supports_tool_choice": true }, "deepinfra/mistralai/Mistral-Small-24B-Instruct-2501": { - "input_cost_per_token": 5e-08, - "litellm_provider": "deepinfra", + "max_tokens": 32768, "max_input_tokens": 32768, "max_output_tokens": 32768, - "max_tokens": 32768, - "mode": "chat", + "input_cost_per_token": 5e-08, "output_cost_per_token": 8e-08, + "litellm_provider": "deepinfra", + "mode": "chat", "supports_tool_choice": true }, "deepinfra/mistralai/Mistral-Small-3.2-24B-Instruct-2506": { - "input_cost_per_token": 5e-08, - "litellm_provider": "deepinfra", + "max_tokens": 128000, "max_input_tokens": 128000, "max_output_tokens": 128000, - "max_tokens": 128000, + "input_cost_per_token": 7.5e-08, + "output_cost_per_token": 2e-07, + "litellm_provider": "deepinfra", "mode": "chat", - "output_cost_per_token": 1e-07, "supports_tool_choice": true }, "deepinfra/mistralai/Mixtral-8x7B-Instruct-v0.1": { - "input_cost_per_token": 8e-08, - "litellm_provider": "deepinfra", + "max_tokens": 32768, "max_input_tokens": 32768, "max_output_tokens": 32768, - "max_tokens": 32768, + "input_cost_per_token": 4e-07, + "output_cost_per_token": 4e-07, + "litellm_provider": "deepinfra", "mode": "chat", - "output_cost_per_token": 2.4e-07, "supports_tool_choice": true }, "deepinfra/moonshotai/Kimi-K2-Instruct": { - "input_cost_per_token": 5e-07, - "litellm_provider": "deepinfra", + "max_tokens": 131072, "max_input_tokens": 131072, "max_output_tokens": 131072, - "max_tokens": 131072, - "mode": "chat", + "input_cost_per_token": 5e-07, "output_cost_per_token": 2e-06, + "litellm_provider": "deepinfra", + "mode": "chat", + "supports_tool_choice": true + }, + "deepinfra/moonshotai/Kimi-K2-Instruct-0905": { + "max_tokens": 262144, + "max_input_tokens": 262144, + "max_output_tokens": 262144, + "input_cost_per_token": 5e-07, + "output_cost_per_token": 2e-06, + "cache_read_input_token_cost": 4e-07, + "litellm_provider": "deepinfra", + "mode": "chat", "supports_tool_choice": true }, "deepinfra/nvidia/Llama-3.1-Nemotron-70B-Instruct": { - "input_cost_per_token": 1.2e-07, - "litellm_provider": "deepinfra", + "max_tokens": 131072, "max_input_tokens": 131072, "max_output_tokens": 131072, - "max_tokens": 131072, + "input_cost_per_token": 6e-07, + "output_cost_per_token": 6e-07, + "litellm_provider": "deepinfra", + "mode": "chat", + "supports_tool_choice": true + }, + "deepinfra/nvidia/Llama-3.3-Nemotron-Super-49B-v1.5": { + "max_tokens": 131072, + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "input_cost_per_token": 1e-07, + "output_cost_per_token": 4e-07, + "litellm_provider": "deepinfra", + "mode": "chat", + "supports_tool_choice": true + }, + "deepinfra/nvidia/NVIDIA-Nemotron-Nano-9B-v2": { + "max_tokens": 131072, + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "input_cost_per_token": 4e-08, + "output_cost_per_token": 1.6e-07, + "litellm_provider": "deepinfra", "mode": "chat", - "output_cost_per_token": 3e-07, "supports_tool_choice": true }, "deepinfra/openai/gpt-oss-120b": { - "input_cost_per_token": 9e-08, - "litellm_provider": "deepinfra", + "max_tokens": 131072, "max_input_tokens": 131072, "max_output_tokens": 131072, - "max_tokens": 131072, - "mode": "chat", + "input_cost_per_token": 5e-08, "output_cost_per_token": 4.5e-07, + "litellm_provider": "deepinfra", + "mode": "chat", "supports_tool_choice": true }, "deepinfra/openai/gpt-oss-20b": { - "input_cost_per_token": 4e-08, - "litellm_provider": "deepinfra", + "max_tokens": 131072, "max_input_tokens": 131072, "max_output_tokens": 131072, - "max_tokens": 131072, + "input_cost_per_token": 4e-08, + "output_cost_per_token": 1.5e-07, + "litellm_provider": "deepinfra", "mode": "chat", - "output_cost_per_token": 1.6e-07, "supports_tool_choice": true }, "deepinfra/zai-org/GLM-4.5": { - "input_cost_per_token": 5.5e-07, - "litellm_provider": "deepinfra", + "max_tokens": 131072, "max_input_tokens": 131072, "max_output_tokens": 131072, - "max_tokens": 131072, - "mode": "chat", - "output_cost_per_token": 2e-06, - "supports_tool_choice": true - }, - "deepinfra/zai-org/GLM-4.5-Air": { - "input_cost_per_token": 2e-07, + "input_cost_per_token": 4e-07, + "output_cost_per_token": 1.6e-06, "litellm_provider": "deepinfra", - "max_input_tokens": 131072, - "max_output_tokens": 131072, - "max_tokens": 131072, "mode": "chat", - "output_cost_per_token": 1.1e-06, "supports_tool_choice": true }, "deepseek/deepseek-chat": { @@ -7550,6 +7840,25 @@ "supports_response_schema": true, "supports_tool_choice": true }, + "eu.anthropic.claude-haiku-4-5-20251001-v1:0": { + "cache_creation_input_token_cost": 1.25e-06, + "cache_read_input_token_cost": 1e-07, + "input_cost_per_token": 1e-06, + "litellm_provider": "bedrock", + "max_input_tokens": 200000, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 5e-06, + "source": "https://aws.amazon.com/about-aws/whats-new/2025/10/claude-4-5-haiku-anthropic-amazon-bedrock", + "supports_assistant_prefill": true, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true + }, "eu.anthropic.claude-3-5-sonnet-20240620-v1:0": { "input_cost_per_token": 3e-06, "litellm_provider": "bedrock", @@ -7722,6 +8031,36 @@ "supports_vision": true, "tool_use_system_prompt_tokens": 159 }, + "eu.anthropic.claude-sonnet-4-5-20250929-v1:0": { + "cache_creation_input_token_cost": 4.125e-06, + "cache_read_input_token_cost": 3.3e-07, + "input_cost_per_token": 3.3e-06, + "input_cost_per_token_above_200k_tokens": 6.6e-06, + "output_cost_per_token_above_200k_tokens": 2.475e-05, + "cache_creation_input_token_cost_above_200k_tokens": 8.25e-06, + "cache_read_input_token_cost_above_200k_tokens": 6.6e-07, + "litellm_provider": "bedrock_converse", + "max_input_tokens": 200000, + "max_output_tokens": 64000, + "max_tokens": 200000, + "mode": "chat", + "output_cost_per_token": 1.65e-05, + "search_context_cost_per_query": { + "search_context_size_high": 0.01, + "search_context_size_low": 0.01, + "search_context_size_medium": 0.01 + }, + "supports_assistant_prefill": true, + "supports_computer_use": true, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true, + "tool_use_system_prompt_tokens": 346 + }, "eu.meta.llama3-2-1b-instruct-v1:0": { "input_cost_per_token": 1.3e-07, "litellm_provider": "bedrock", @@ -9265,6 +9604,54 @@ "supports_vision": true, "supports_web_search": true }, + "gemini-2.5-flash-image": { + "cache_read_input_token_cost": 3e-08, + "input_cost_per_audio_token": 1e-06, + "input_cost_per_token": 3e-07, + "litellm_provider": "vertex_ai-language-models", + "max_audio_length_hours": 8.4, + "max_audio_per_prompt": 1, + "max_images_per_prompt": 3000, + "max_input_tokens": 32768, + "max_output_tokens": 32768, + "max_tokens": 32768, + "max_pdf_size_mb": 30, + "max_video_length": 1, + "max_videos_per_prompt": 10, + "mode": "image_generation", + "output_cost_per_image": 0.039, + "output_cost_per_reasoning_token": 2.5e-06, + "output_cost_per_token": 2.5e-06, + "rpm": 100000, + "source": "https://ai.google.dev/gemini-api/docs/pricing#gemini-2.5-flash-image", + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/completions", + "/v1/batch" + ], + "supported_modalities": [ + "text", + "image", + "audio", + "video" + ], + "supported_output_modalities": [ + "text", + "image" + ], + "supports_audio_output": false, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_url_context": true, + "supports_vision": true, + "supports_web_search": true, + "tpm": 8000000 + }, "gemini-2.5-flash-image-preview": { "cache_read_input_token_cost": 7.5e-08, "input_cost_per_audio_token": 1e-06, @@ -9448,96 +9835,6 @@ "supports_vision": true, "supports_web_search": true }, - "gemini-flash-latest": { - "cache_read_input_token_cost": 7.5e-08, - "input_cost_per_audio_token": 1e-06, - "input_cost_per_token": 3e-07, - "litellm_provider": "vertex_ai-language-models", - "max_audio_length_hours": 8.4, - "max_audio_per_prompt": 1, - "max_images_per_prompt": 3000, - "max_input_tokens": 1048576, - "max_output_tokens": 65535, - "max_pdf_size_mb": 30, - "max_tokens": 65535, - "max_video_length": 1, - "max_videos_per_prompt": 10, - "mode": "chat", - "output_cost_per_reasoning_token": 2.5e-06, - "output_cost_per_token": 2.5e-06, - "source": "https://developers.googleblog.com/en/continuing-to-bring-you-our-latest-models-with-an-improved-gemini-2-5-flash-and-flash-lite-release/", - "supported_endpoints": [ - "/v1/chat/completions", - "/v1/completions", - "/v1/batch" - ], - "supported_modalities": [ - "text", - "image", - "audio", - "video" - ], - "supported_output_modalities": [ - "text" - ], - "supports_audio_output": false, - "supports_function_calling": true, - "supports_parallel_function_calling": true, - "supports_pdf_input": true, - "supports_prompt_caching": true, - "supports_reasoning": true, - "supports_response_schema": true, - "supports_system_messages": true, - "supports_tool_choice": true, - "supports_url_context": true, - "supports_vision": true, - "supports_web_search": true - }, - "gemini-flash-lite-latest": { - "cache_read_input_token_cost": 2.5e-08, - "input_cost_per_audio_token": 3e-07, - "input_cost_per_token": 1e-07, - "litellm_provider": "vertex_ai-language-models", - "max_audio_length_hours": 8.4, - "max_audio_per_prompt": 1, - "max_images_per_prompt": 3000, - "max_input_tokens": 1048576, - "max_output_tokens": 65535, - "max_pdf_size_mb": 30, - "max_tokens": 65535, - "max_video_length": 1, - "max_videos_per_prompt": 10, - "mode": "chat", - "output_cost_per_reasoning_token": 4e-07, - "output_cost_per_token": 4e-07, - "source": "https://developers.googleblog.com/en/continuing-to-bring-you-our-latest-models-with-an-improved-gemini-2-5-flash-and-flash-lite-release/", - "supported_endpoints": [ - "/v1/chat/completions", - "/v1/completions", - "/v1/batch" - ], - "supported_modalities": [ - "text", - "image", - "audio", - "video" - ], - "supported_output_modalities": [ - "text" - ], - "supports_audio_output": false, - "supports_function_calling": true, - "supports_parallel_function_calling": true, - "supports_pdf_input": true, - "supports_prompt_caching": true, - "supports_reasoning": true, - "supports_response_schema": true, - "supports_system_messages": true, - "supports_tool_choice": true, - "supports_url_context": true, - "supports_vision": true, - "supports_web_search": true - }, "gemini-2.5-flash-lite-preview-06-17": { "cache_read_input_token_cost": 2.5e-08, "input_cost_per_audio_token": 5e-07, @@ -10808,6 +11105,54 @@ "supports_web_search": true, "tpm": 8000000 }, + "gemini/gemini-2.5-flash-image": { + "cache_read_input_token_cost": 3e-08, + "input_cost_per_audio_token": 1e-06, + "input_cost_per_token": 3e-07, + "litellm_provider": "vertex_ai-language-models", + "max_audio_length_hours": 8.4, + "max_audio_per_prompt": 1, + "max_images_per_prompt": 3000, + "max_input_tokens": 32768, + "max_output_tokens": 32768, + "max_tokens": 32768, + "max_pdf_size_mb": 30, + "max_video_length": 1, + "max_videos_per_prompt": 10, + "mode": "image_generation", + "output_cost_per_image": 0.039, + "output_cost_per_reasoning_token": 2.5e-06, + "output_cost_per_token": 2.5e-06, + "rpm": 100000, + "source": "https://ai.google.dev/gemini-api/docs/pricing#gemini-2.5-flash-image", + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/completions", + "/v1/batch" + ], + "supported_modalities": [ + "text", + "image", + "audio", + "video" + ], + "supported_output_modalities": [ + "text", + "image" + ], + "supports_audio_output": false, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_url_context": true, + "supports_vision": true, + "supports_web_search": true, + "tpm": 8000000 + }, "gemini/gemini-2.5-flash-image-preview": { "cache_read_input_token_cost": 7.5e-08, "input_cost_per_audio_token": 1e-06, @@ -11765,6 +12110,66 @@ "video" ] }, + "global.anthropic.claude-sonnet-4-5-20250929-v1:0": { + "cache_creation_input_token_cost": 3.75e-06, + "cache_read_input_token_cost": 3e-07, + "input_cost_per_token": 3e-06, + "input_cost_per_token_above_200k_tokens": 6e-06, + "output_cost_per_token_above_200k_tokens": 2.25e-05, + "cache_creation_input_token_cost_above_200k_tokens": 7.5e-06, + "cache_read_input_token_cost_above_200k_tokens": 6e-07, + "litellm_provider": "bedrock_converse", + "max_input_tokens": 200000, + "max_output_tokens": 64000, + "max_tokens": 200000, + "mode": "chat", + "output_cost_per_token": 1.5e-05, + "search_context_cost_per_query": { + "search_context_size_high": 0.01, + "search_context_size_low": 0.01, + "search_context_size_medium": 0.01 + }, + "supports_assistant_prefill": true, + "supports_computer_use": true, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true, + "tool_use_system_prompt_tokens": 346 + }, + "global.anthropic.claude-sonnet-4-20250514-v1:0": { + "cache_creation_input_token_cost": 3.75e-06, + "cache_read_input_token_cost": 3e-07, + "input_cost_per_token": 3e-06, + "input_cost_per_token_above_200k_tokens": 6e-06, + "output_cost_per_token_above_200k_tokens": 2.25e-05, + "cache_creation_input_token_cost_above_200k_tokens": 7.5e-06, + "cache_read_input_token_cost_above_200k_tokens": 6e-07, + "litellm_provider": "bedrock_converse", + "max_input_tokens": 1000000, + "max_output_tokens": 64000, + "max_tokens": 64000, + "mode": "chat", + "output_cost_per_token": 1.5e-05, + "search_context_cost_per_query": { + "search_context_size_high": 0.01, + "search_context_size_low": 0.01, + "search_context_size_medium": 0.01 + }, + "supports_assistant_prefill": true, + "supports_computer_use": true, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true, + "tool_use_system_prompt_tokens": 159 + }, "gpt-3.5-turbo": { "input_cost_per_token": 0.5e-06, "litellm_provider": "openai", @@ -12817,6 +13222,72 @@ "supports_tool_choice": true, "supports_vision": true }, + "gpt-5-pro": { + "input_cost_per_token": 1.5e-05, + "input_cost_per_token_batches": 7.5e-06, + "litellm_provider": "openai", + "max_input_tokens": 400000, + "max_output_tokens": 272000, + "max_tokens": 272000, + "mode": "responses", + "output_cost_per_token": 1.2e-04, + "output_cost_per_token_batches": 6e-05, + "supported_endpoints": [ + "/v1/batch", + "/v1/responses" + ], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text" + ], + "supports_function_calling": true, + "supports_native_streaming": false, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_vision": true, + "supports_web_search": true + }, + "gpt-5-pro-2025-10-06": { + "input_cost_per_token": 1.5e-05, + "input_cost_per_token_batches": 7.5e-06, + "litellm_provider": "openai", + "max_input_tokens": 400000, + "max_output_tokens": 272000, + "max_tokens": 272000, + "mode": "responses", + "output_cost_per_token": 1.2e-04, + "output_cost_per_token_batches": 6e-05, + "supported_endpoints": [ + "/v1/batch", + "/v1/responses" + ], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text" + ], + "supports_function_calling": true, + "supports_native_streaming": false, + "supports_parallel_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_vision": true, + "supports_web_search": true + }, "gpt-5-2025-08-07": { "cache_read_input_token_cost": 1.25e-07, "cache_read_input_token_cost_flex": 6.25e-08, @@ -12892,9 +13363,9 @@ "cache_read_input_token_cost": 1.25e-07, "input_cost_per_token": 1.25e-06, "litellm_provider": "openai", - "max_input_tokens": 272000, - "max_output_tokens": 128000, - "max_tokens": 128000, + "max_input_tokens": 128000, + "max_output_tokens": 16384, + "max_tokens": 16384, "mode": "chat", "output_cost_per_token": 1e-05, "supported_endpoints": [ @@ -13106,6 +13577,19 @@ "/v1/images/generations" ] }, + "gpt-image-1-mini": { + "cache_read_input_image_token_cost": 2.5e-07, + "cache_read_input_token_cost": 2e-07, + "input_cost_per_image_token": 2.5e-06, + "input_cost_per_token": 2e-06, + "litellm_provider": "openai", + "mode": "chat", + "output_cost_per_image_token": 8e-06, + "supported_endpoints": [ + "/v1/images/generations", + "/v1/images/edits" + ] + }, "gpt-realtime": { "cache_creation_input_audio_token_cost": 4e-07, "cache_read_input_token_cost": 4e-07, @@ -13138,6 +13622,37 @@ "supports_system_messages": true, "supports_tool_choice": true }, + "gpt-realtime-mini": { + "cache_creation_input_audio_token_cost": 3e-07, + "cache_read_input_audio_token_cost": 3e-07, + "input_cost_per_audio_token": 1e-05, + "input_cost_per_token": 6e-07, + "litellm_provider": "openai", + "max_input_tokens": 128000, + "max_output_tokens": 4096, + "max_tokens": 4096, + "mode": "chat", + "output_cost_per_audio_token": 2e-05, + "output_cost_per_token": 2.4e-06, + "supported_endpoints": [ + "/v1/realtime" + ], + "supported_modalities": [ + "text", + "image", + "audio" + ], + "supported_output_modalities": [ + "text", + "audio" + ], + "supports_audio_input": true, + "supports_audio_output": true, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_system_messages": true, + "supports_tool_choice": true + }, "gpt-realtime-2025-08-28": { "cache_creation_input_audio_token_cost": 4e-07, "cache_read_input_token_cost": 4e-07, @@ -13346,6 +13861,18 @@ ], "supports_tool_choice": false }, + "lemonade/Qwen3-Coder-30B-A3B-Instruct-GGUF": { + "input_cost_per_token": 0, + "litellm_provider": "lemonade", + "max_tokens": 32768, + "max_input_tokens": 32768, + "max_output_tokens": 32768, + "mode": "chat", + "output_cost_per_token": 0, + "supports_function_calling": true, + "supports_response_schema": true, + "supports_tool_choice": true + }, "groq/deepseek-r1-distill-llama-70b": { "input_cost_per_token": 7.5e-07, "litellm_provider": "groq", @@ -13635,6 +14162,19 @@ "supports_response_schema": true, "supports_tool_choice": true }, + "groq/moonshotai/kimi-k2-instruct-0905": { + "input_cost_per_token": 1e-06, + "output_cost_per_token": 3e-06, + "cache_read_input_token_cost": 0.5e-06, + "litellm_provider": "groq", + "max_input_tokens": 262144, + "max_output_tokens": 16384, + "max_tokens": 278528, + "mode": "chat", + "supports_function_calling": true, + "supports_response_schema": true, + "supports_tool_choice": true + }, "groq/openai/gpt-oss-120b": { "input_cost_per_token": 1.5e-07, "litellm_provider": "groq", @@ -14110,6 +14650,36 @@ "mode": "rerank", "output_cost_per_token": 1.8e-08 }, + "jp.anthropic.claude-sonnet-4-5-20250929-v1:0": { + "cache_creation_input_token_cost": 4.125e-06, + "cache_read_input_token_cost": 3.3e-07, + "input_cost_per_token": 3.3e-06, + "input_cost_per_token_above_200k_tokens": 6.6e-06, + "output_cost_per_token_above_200k_tokens": 2.475e-05, + "cache_creation_input_token_cost_above_200k_tokens": 8.25e-06, + "cache_read_input_token_cost_above_200k_tokens": 6.6e-07, + "litellm_provider": "bedrock_converse", + "max_input_tokens": 200000, + "max_output_tokens": 64000, + "max_tokens": 200000, + "mode": "chat", + "output_cost_per_token": 1.65e-05, + "search_context_cost_per_query": { + "search_context_size_high": 0.01, + "search_context_size_low": 0.01, + "search_context_size_medium": 0.01 + }, + "supports_assistant_prefill": true, + "supports_computer_use": true, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_vision": true, + "tool_use_system_prompt_tokens": 346 + }, "lambda_ai/deepseek-llama3.3-70b": { "input_cost_per_token": 2e-07, "litellm_provider": "lambda_ai", @@ -14485,6 +15055,54 @@ "/v1/images/generations" ] }, + "low/1024-x-1024/gpt-image-1-mini": { + "input_cost_per_image": 0.005, + "litellm_provider": "openai", + "mode": "image_generation", + "supported_endpoints": [ + "/v1/images/generations" + ] + }, + "low/1024-x-1536/gpt-image-1-mini": { + "input_cost_per_image": 0.006, + "litellm_provider": "openai", + "mode": "image_generation", + "supported_endpoints": [ + "/v1/images/generations" + ] + }, + "low/1536-x-1024/gpt-image-1-mini": { + "input_cost_per_image": 0.006, + "litellm_provider": "openai", + "mode": "image_generation", + "supported_endpoints": [ + "/v1/images/generations" + ] + }, + "medium/1024-x-1024/gpt-image-1-mini": { + "input_cost_per_image": 0.011, + "litellm_provider": "openai", + "mode": "image_generation", + "supported_endpoints": [ + "/v1/images/generations" + ] + }, + "medium/1024-x-1536/gpt-image-1-mini": { + "input_cost_per_image": 0.015, + "litellm_provider": "openai", + "mode": "image_generation", + "supported_endpoints": [ + "/v1/images/generations" + ] + }, + "medium/1536-x-1024/gpt-image-1-mini": { + "input_cost_per_image": 0.015, + "litellm_provider": "openai", + "mode": "image_generation", + "supported_endpoints": [ + "/v1/images/generations" + ] + }, "medlm-large": { "input_cost_per_character": 5e-06, "litellm_provider": "vertex_ai-language-models", @@ -16231,6 +16849,42 @@ "supports_function_calling": true, "supports_response_schema": false }, + "oci/cohere.command-latest": { + "input_cost_per_token": 1.56e-06, + "litellm_provider": "oci", + "max_input_tokens": 128000, + "max_output_tokens": 4000, + "max_tokens": 128000, + "mode": "chat", + "output_cost_per_token": 1.56e-06, + "source": "https://www.oracle.com/cloud/ai/generative-ai/pricing/", + "supports_function_calling": true, + "supports_response_schema": false + }, + "oci/cohere.command-a-03-2025": { + "input_cost_per_token": 1.56e-06, + "litellm_provider": "oci", + "max_input_tokens": 256000, + "max_output_tokens": 4000, + "max_tokens": 256000, + "mode": "chat", + "output_cost_per_token": 1.56e-06, + "source": "https://www.oracle.com/cloud/ai/generative-ai/pricing/", + "supports_function_calling": true, + "supports_response_schema": false + }, + "oci/cohere.command-plus-latest": { + "input_cost_per_token": 1.56e-06, + "litellm_provider": "oci", + "max_input_tokens": 128000, + "max_output_tokens": 4000, + "max_tokens": 128000, + "mode": "chat", + "output_cost_per_token": 1.56e-06, + "source": "https://www.oracle.com/cloud/ai/generative-ai/pricing/", + "supports_function_calling": true, + "supports_response_schema": false + }, "ollama/codegeex4": { "input_cost_per_token": 0.0, "litellm_provider": "ollama", @@ -16764,6 +17418,25 @@ "supports_vision": true, "tool_use_system_prompt_tokens": 159 }, + "openrouter/anthropic/claude-sonnet-4.5": { + "input_cost_per_image": 0.0048, + "input_cost_per_token": 3e-06, + "input_cost_per_token_above_200k_tokens": 6e-06, + "output_cost_per_token_above_200k_tokens": 2.25e-05, + "litellm_provider": "openrouter", + "max_input_tokens": 1000000, + "max_output_tokens": 1000000, + "max_tokens": 1000000, + "mode": "chat", + "output_cost_per_token": 1.5e-05, + "supports_assistant_prefill": true, + "supports_computer_use": true, + "supports_function_calling": true, + "supports_reasoning": true, + "supports_tool_choice": true, + "supports_vision": true, + "tool_use_system_prompt_tokens": 159 + }, "openrouter/bytedance/ui-tars-1.5-7b": { "input_cost_per_token": 1e-07, "litellm_provider": "openrouter", @@ -18384,6 +19057,20 @@ "mode": "rerank", "output_cost_per_token": 0.0 }, + "nvidia_nim/nvidia/nv-rerankqa-mistral-4b-v3": { + "input_cost_per_query": 0.0, + "input_cost_per_token": 0.0, + "litellm_provider": "nvidia_nim", + "mode": "rerank", + "output_cost_per_token": 0.0 + }, + "nvidia_nim/nvidia/llama-3_2-nv-rerankqa-1b-v2": { + "input_cost_per_query": 0.0, + "input_cost_per_token": 0.0, + "litellm_provider": "nvidia_nim", + "mode": "rerank", + "output_cost_per_token": 0.0 + }, "sagemaker/meta-textgeneration-llama-2-13b": { "input_cost_per_token": 0.0, "litellm_provider": "sagemaker", @@ -19228,6 +19915,22 @@ "mode": "embedding", "output_cost_per_token": 0.0 }, + "together_ai/baai/bge-base-en-v1.5": { + "input_cost_per_token": 8e-09, + "litellm_provider": "together_ai", + "max_input_tokens": 512, + "mode": "embedding", + "output_cost_per_token": 0.0, + "output_vector_size": 768 + }, + "together_ai/BAAI/bge-base-en-v1.5": { + "input_cost_per_token": 8e-09, + "litellm_provider": "together_ai", + "max_input_tokens": 512, + "mode": "embedding", + "output_cost_per_token": 0.0, + "output_vector_size": 768 + }, "together-ai-up-to-4b": { "input_cost_per_token": 1e-07, "litellm_provider": "together_ai", @@ -19488,6 +20191,39 @@ "supports_parallel_function_calling": true, "supports_tool_choice": true }, + "together_ai/moonshotai/Kimi-K2-Instruct-0905": { + "input_cost_per_token": 1e-06, + "litellm_provider": "together_ai", + "max_input_tokens": 262144, + "mode": "chat", + "output_cost_per_token": 3e-06, + "source": "https://www.together.ai/models/kimi-k2-0905", + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_tool_choice": true + }, + "together_ai/Qwen/Qwen3-Next-80B-A3B-Instruct": { + "input_cost_per_token": 1.5e-07, + "litellm_provider": "together_ai", + "max_input_tokens": 262144, + "mode": "chat", + "output_cost_per_token": 1.5e-06, + "source": "https://www.together.ai/models/qwen3-next-80b-a3b-instruct", + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_tool_choice": true + }, + "together_ai/Qwen/Qwen3-Next-80B-A3B-Thinking": { + "input_cost_per_token": 1.5e-07, + "litellm_provider": "together_ai", + "max_input_tokens": 262144, + "mode": "chat", + "output_cost_per_token": 1.5e-06, + "source": "https://www.together.ai/models/qwen3-next-80b-a3b-thinking", + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_tool_choice": true + }, "tts-1": { "input_cost_per_character": 1.5e-05, "litellm_provider": "openai", @@ -19575,6 +20311,25 @@ "supports_response_schema": true, "supports_tool_choice": true }, + "us.anthropic.claude-haiku-4-5-20251001-v1:0": { + "cache_creation_input_token_cost": 1.25e-06, + "cache_read_input_token_cost": 1e-07, + "input_cost_per_token": 1e-06, + "litellm_provider": "bedrock", + "max_input_tokens": 200000, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 5e-06, + "source": "https://aws.amazon.com/about-aws/whats-new/2025/10/claude-4-5-haiku-anthropic-amazon-bedrock", + "supports_assistant_prefill": true, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true + }, "us.anthropic.claude-3-5-sonnet-20240620-v1:0": { "input_cost_per_token": 3e-06, "litellm_provider": "bedrock", @@ -19696,15 +20451,19 @@ "tool_use_system_prompt_tokens": 159 }, "us.anthropic.claude-sonnet-4-5-20250929-v1:0": { - "cache_creation_input_token_cost": 3.75e-06, - "cache_read_input_token_cost": 3e-07, - "input_cost_per_token": 3e-06, + "cache_creation_input_token_cost": 4.125e-06, + "cache_read_input_token_cost": 3.3e-07, + "input_cost_per_token": 3.3e-06, + "input_cost_per_token_above_200k_tokens": 6.6e-06, + "output_cost_per_token_above_200k_tokens": 2.475e-05, + "cache_creation_input_token_cost_above_200k_tokens": 8.25e-06, + "cache_read_input_token_cost_above_200k_tokens": 6.6e-07, "litellm_provider": "bedrock_converse", "max_input_tokens": 200000, "max_output_tokens": 64000, "max_tokens": 200000, "mode": "chat", - "output_cost_per_token": 1.5e-05, + "output_cost_per_token": 1.65e-05, "search_context_cost_per_query": { "search_context_size_high": 0.01, "search_context_size_low": 0.01, @@ -20842,6 +21601,25 @@ "supports_pdf_input": true, "supports_tool_choice": true }, + "vertex_ai/claude-haiku-4-5@20251001": { + "cache_creation_input_token_cost": 1.25e-06, + "cache_read_input_token_cost": 1e-07, + "input_cost_per_token": 1e-06, + "litellm_provider": "vertex_ai-anthropic_models", + "max_input_tokens": 200000, + "max_output_tokens": 8192, + "max_tokens": 8192, + "mode": "chat", + "output_cost_per_token": 5e-06, + "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/partner-models/claude/haiku-4-5", + "supports_assistant_prefill": true, + "supports_function_calling": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_reasoning": true, + "supports_response_schema": true, + "supports_tool_choice": true + }, "vertex_ai/claude-3-5-sonnet": { "input_cost_per_token": 3e-06, "litellm_provider": "vertex_ai-anthropic_models", @@ -21065,6 +21843,10 @@ "cache_creation_input_token_cost": 3.75e-06, "cache_read_input_token_cost": 3e-07, "input_cost_per_token": 3e-06, + "input_cost_per_token_above_200k_tokens": 6e-06, + "output_cost_per_token_above_200k_tokens": 2.25e-05, + "cache_creation_input_token_cost_above_200k_tokens": 7.5e-06, + "cache_read_input_token_cost_above_200k_tokens": 6e-07, "input_cost_per_token_batches": 1.5e-06, "litellm_provider": "vertex_ai-anthropic_models", "max_input_tokens": 200000, @@ -21087,6 +21869,10 @@ "cache_creation_input_token_cost": 3.75e-06, "cache_read_input_token_cost": 3e-07, "input_cost_per_token": 3e-06, + "input_cost_per_token_above_200k_tokens": 6e-06, + "output_cost_per_token_above_200k_tokens": 2.25e-05, + "cache_creation_input_token_cost_above_200k_tokens": 7.5e-06, + "cache_read_input_token_cost_above_200k_tokens": 6e-07, "input_cost_per_token_batches": 1.5e-06, "litellm_provider": "vertex_ai-anthropic_models", "max_input_tokens": 200000, @@ -22009,6 +22795,307 @@ "supports_tool_choice": true, "supports_vision": false }, + "watsonx/bigscience/mt0-xxl-13b": { + "max_tokens": 8192, + "max_input_tokens": 8192, + "max_output_tokens": 8192, + "input_cost_per_token": 0.0005, + "output_cost_per_token": 0.002, + "litellm_provider": "watsonx", + "mode": "chat", + "supports_function_calling": false, + "supports_parallel_function_calling": false, + "supports_vision": false + }, + "watsonx/core42/jais-13b-chat": { + "max_tokens": 8192, + "max_input_tokens": 8192, + "max_output_tokens": 8192, + "input_cost_per_token": 0.0005, + "output_cost_per_token": 0.002, + "litellm_provider": "watsonx", + "mode": "chat", + "supports_function_calling": false, + "supports_parallel_function_calling": false, + "supports_vision": false + }, + "watsonx/google/flan-t5-xl-3b": { + "max_tokens": 8192, + "max_input_tokens": 8192, + "max_output_tokens": 8192, + "input_cost_per_token": 0.0001, + "output_cost_per_token": 0.00025, + "litellm_provider": "watsonx", + "mode": "chat", + "supports_function_calling": false, + "supports_parallel_function_calling": false, + "supports_vision": false + }, + "watsonx/ibm/granite-13b-chat-v2": { + "max_tokens": 8192, + "max_input_tokens": 8192, + "max_output_tokens": 8192, + "input_cost_per_token": 0.0005, + "output_cost_per_token": 0.002, + "litellm_provider": "watsonx", + "mode": "chat", + "supports_function_calling": false, + "supports_parallel_function_calling": false, + "supports_vision": false + }, + "watsonx/ibm/granite-13b-instruct-v2": { + "max_tokens": 8192, + "max_input_tokens": 8192, + "max_output_tokens": 8192, + "input_cost_per_token": 0.0005, + "output_cost_per_token": 0.002, + "litellm_provider": "watsonx", + "mode": "chat", + "supports_function_calling": false, + "supports_parallel_function_calling": false, + "supports_vision": false + }, + "watsonx/ibm/granite-3-3-8b-instruct": { + "max_tokens": 8192, + "max_input_tokens": 8192, + "max_output_tokens": 8192, + "input_cost_per_token": 0.00025, + "output_cost_per_token": 0.001, + "litellm_provider": "watsonx", + "mode": "chat", + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_vision": false + }, + "watsonx/ibm/granite-4-h-small": { + "max_tokens": 20480, + "max_input_tokens": 20480, + "max_output_tokens": 20480, + "input_cost_per_token": 0.000625, + "output_cost_per_token": 0.0025, + "litellm_provider": "watsonx", + "mode": "chat", + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_vision": false + }, + "watsonx/ibm/granite-guardian-3-2-2b": { + "max_tokens": 8192, + "max_input_tokens": 8192, + "max_output_tokens": 8192, + "input_cost_per_token": 0.00015, + "output_cost_per_token": 0.0006, + "litellm_provider": "watsonx", + "mode": "chat", + "supports_function_calling": false, + "supports_parallel_function_calling": false, + "supports_vision": false + }, + "watsonx/ibm/granite-guardian-3-3-8b": { + "max_tokens": 8192, + "max_input_tokens": 8192, + "max_output_tokens": 8192, + "input_cost_per_token": 0.00025, + "output_cost_per_token": 0.001, + "litellm_provider": "watsonx", + "mode": "chat", + "supports_function_calling": false, + "supports_parallel_function_calling": false, + "supports_vision": false + }, + "watsonx/ibm/granite-ttm-1024-96-r2": { + "max_tokens": 512, + "max_input_tokens": 512, + "max_output_tokens": 512, + "input_cost_per_token": 0.000625, + "output_cost_per_token": 0.000625, + "litellm_provider": "watsonx", + "mode": "chat", + "supports_function_calling": false, + "supports_parallel_function_calling": false, + "supports_vision": false + }, + "watsonx/ibm/granite-ttm-1536-96-r2": { + "max_tokens": 512, + "max_input_tokens": 512, + "max_output_tokens": 512, + "input_cost_per_token": 0.000625, + "output_cost_per_token": 0.000625, + "litellm_provider": "watsonx", + "mode": "chat", + "supports_function_calling": false, + "supports_parallel_function_calling": false, + "supports_vision": false + }, + "watsonx/ibm/granite-ttm-512-96-r2": { + "max_tokens": 512, + "max_input_tokens": 512, + "max_output_tokens": 512, + "input_cost_per_token": 0.000625, + "output_cost_per_token": 0.000625, + "litellm_provider": "watsonx", + "mode": "chat", + "supports_function_calling": false, + "supports_parallel_function_calling": false, + "supports_vision": false + }, + "watsonx/ibm/granite-vision-3-2-2b": { + "max_tokens": 8192, + "max_input_tokens": 8192, + "max_output_tokens": 8192, + "input_cost_per_token": 0.00015, + "output_cost_per_token": 0.0006, + "litellm_provider": "watsonx", + "mode": "chat", + "supports_function_calling": false, + "supports_parallel_function_calling": false, + "supports_vision": true + }, + "watsonx/meta-llama/llama-3-2-11b-vision-instruct": { + "max_tokens": 128000, + "max_input_tokens": 128000, + "max_output_tokens": 128000, + "input_cost_per_token": 0.00025, + "output_cost_per_token": 0.001, + "litellm_provider": "watsonx", + "mode": "chat", + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_vision": true + }, + "watsonx/meta-llama/llama-3-2-1b-instruct": { + "max_tokens": 128000, + "max_input_tokens": 128000, + "max_output_tokens": 128000, + "input_cost_per_token": 0.0001, + "output_cost_per_token": 0.0002, + "litellm_provider": "watsonx", + "mode": "chat", + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_vision": false + }, + "watsonx/meta-llama/llama-3-2-3b-instruct": { + "max_tokens": 128000, + "max_input_tokens": 128000, + "max_output_tokens": 128000, + "input_cost_per_token": 0.00015, + "output_cost_per_token": 0.0006, + "litellm_provider": "watsonx", + "mode": "chat", + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_vision": false + }, + "watsonx/meta-llama/llama-3-2-90b-vision-instruct": { + "max_tokens": 128000, + "max_input_tokens": 128000, + "max_output_tokens": 128000, + "input_cost_per_token": 0.002, + "output_cost_per_token": 0.008, + "litellm_provider": "watsonx", + "mode": "chat", + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_vision": true + }, + "watsonx/meta-llama/llama-3-3-70b-instruct": { + "max_tokens": 128000, + "max_input_tokens": 128000, + "max_output_tokens": 128000, + "input_cost_per_token": 0.002, + "output_cost_per_token": 0.006, + "litellm_provider": "watsonx", + "mode": "chat", + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_vision": false + }, + "watsonx/meta-llama/llama-4-maverick-17b": { + "max_tokens": 128000, + "max_input_tokens": 128000, + "max_output_tokens": 128000, + "input_cost_per_token": 0.0005, + "output_cost_per_token": 0.002, + "litellm_provider": "watsonx", + "mode": "chat", + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_vision": false + }, + "watsonx/meta-llama/llama-guard-3-11b-vision": { + "max_tokens": 128000, + "max_input_tokens": 128000, + "max_output_tokens": 128000, + "input_cost_per_token": 0.00025, + "output_cost_per_token": 0.001, + "litellm_provider": "watsonx", + "mode": "chat", + "supports_function_calling": false, + "supports_parallel_function_calling": false, + "supports_vision": true + }, + "watsonx/mistralai/mistral-medium-2505": { + "max_tokens": 128000, + "max_input_tokens": 128000, + "max_output_tokens": 128000, + "input_cost_per_token": 0.00225, + "output_cost_per_token": 0.00675, + "litellm_provider": "watsonx", + "mode": "chat", + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_vision": false + }, + "watsonx/mistralai/mistral-small-2503": { + "max_tokens": 32000, + "max_input_tokens": 32000, + "max_output_tokens": 32000, + "input_cost_per_token": 0.0002, + "output_cost_per_token": 0.0006, + "litellm_provider": "watsonx", + "mode": "chat", + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_vision": false + }, + "watsonx/mistralai/pixtral-12b-2409": { + "max_tokens": 128000, + "max_input_tokens": 128000, + "max_output_tokens": 128000, + "input_cost_per_token": 0.00015, + "output_cost_per_token": 0.00015, + "litellm_provider": "watsonx", + "mode": "chat", + "supports_function_calling": false, + "supports_parallel_function_calling": false, + "supports_vision": true + }, + "watsonx/openai/gpt-oss-120b": { + "max_tokens": 8192, + "max_input_tokens": 8192, + "max_output_tokens": 8192, + "input_cost_per_token": 0.004, + "output_cost_per_token": 0.016, + "litellm_provider": "watsonx", + "mode": "chat", + "supports_function_calling": false, + "supports_parallel_function_calling": false, + "supports_vision": false + }, + "watsonx/sdaia/allam-1-13b-instruct": { + "max_tokens": 8192, + "max_input_tokens": 8192, + "max_output_tokens": 8192, + "input_cost_per_token": 0.0005, + "output_cost_per_token": 0.002, + "litellm_provider": "watsonx", + "mode": "chat", + "supports_function_calling": false, + "supports_parallel_function_calling": false, + "supports_vision": false + }, + "whisper-1": { "input_cost_per_second": 0.0001, "litellm_provider": "openai", diff --git a/model_prices_and_context_window.sha256 b/model_prices_and_context_window.sha256 index 1cd539c2..38e26a7f 100644 --- a/model_prices_and_context_window.sha256 +++ b/model_prices_and_context_window.sha256 @@ -1 +1 @@ -eb63a796c771cee4945cd2546a4bdb8fd9615a57a9ba3e29cf98b682db772ebb +3fa853a6cb4e9023c170a9c3fc9ad95dfb2cd6304b623bcedf49e3797af7d8b7