diff --git a/model_prices_and_context_window.json b/model_prices_and_context_window.json index 0b97d876..0005269a 100644 --- a/model_prices_and_context_window.json +++ b/model_prices_and_context_window.json @@ -1644,7 +1644,7 @@ "cache_read_input_token_cost": 1.4e-07, "input_cost_per_token": 1.38e-06, "litellm_provider": "azure", - "max_input_tokens": 272000, + "max_input_tokens": 128000, "max_output_tokens": 128000, "max_tokens": 128000, "mode": "chat", @@ -1944,7 +1944,7 @@ "cache_read_input_token_cost": 1.25e-07, "input_cost_per_token": 1.25e-06, "litellm_provider": "azure", - "max_input_tokens": 272000, + "max_input_tokens": 128000, "max_output_tokens": 128000, "max_tokens": 128000, "mode": "chat", @@ -2893,7 +2893,7 @@ "input_cost_per_token": 1.25e-06, "input_cost_per_token_priority": 2.5e-06, "litellm_provider": "azure", - "max_input_tokens": 272000, + "max_input_tokens": 400000, "max_output_tokens": 128000, "max_tokens": 128000, "mode": "chat", @@ -2964,7 +2964,7 @@ "input_cost_per_token": 1.25e-06, "input_cost_per_token_priority": 2.5e-06, "litellm_provider": "azure", - "max_input_tokens": 272000, + "max_input_tokens": 400000, "max_output_tokens": 128000, "max_tokens": 128000, "mode": "responses", @@ -2997,7 +2997,7 @@ "input_cost_per_token": 2.5e-07, "input_cost_per_token_priority": 4.5e-07, "litellm_provider": "azure", - "max_input_tokens": 272000, + "max_input_tokens": 400000, "max_output_tokens": 128000, "max_tokens": 128000, "mode": "responses", @@ -3092,9 +3092,9 @@ "cache_read_input_token_cost": 1.25e-07, "input_cost_per_token": 1.25e-06, "litellm_provider": "azure", - "max_input_tokens": 272000, - "max_output_tokens": 128000, - "max_tokens": 128000, + "max_input_tokens": 128000, + "max_output_tokens": 16384, + "max_tokens": 16384, "mode": "chat", "output_cost_per_token": 1e-05, "source": "https://azure.microsoft.com/en-us/blog/gpt-5-in-azure-ai-foundry-the-future-of-ai-apps-and-agents-starts-here/", @@ -3377,7 +3377,7 @@ "cache_read_input_token_cost": 1.25e-07, "input_cost_per_token": 1.25e-06, "litellm_provider": "azure", - "max_input_tokens": 272000, + "max_input_tokens": 128000, "max_output_tokens": 128000, "max_tokens": 128000, "mode": "chat", @@ -3440,7 +3440,7 @@ "cache_read_input_token_cost": 1.25e-07, "input_cost_per_token": 1.25e-06, "litellm_provider": "azure", - "max_input_tokens": 400000, + "max_input_tokens": 272000, "max_output_tokens": 128000, "max_tokens": 128000, "mode": "responses", @@ -3500,7 +3500,7 @@ "cache_read_input_token_cost": 1.75e-07, "input_cost_per_token": 1.75e-06, "litellm_provider": "azure", - "max_input_tokens": 400000, + "max_input_tokens": 272000, "max_output_tokens": 128000, "max_tokens": 128000, "mode": "chat", @@ -3637,7 +3637,7 @@ "azure/gpt-5.2-pro": { "input_cost_per_token": 2.1e-05, "litellm_provider": "azure", - "max_input_tokens": 400000, + "max_input_tokens": 272000, "max_output_tokens": 128000, "max_tokens": 128000, "mode": "responses", @@ -3668,7 +3668,7 @@ "azure/gpt-5.2-pro-2025-12-11": { "input_cost_per_token": 2.1e-05, "litellm_provider": "azure", - "max_input_tokens": 400000, + "max_input_tokens": 272000, "max_output_tokens": 128000, "max_tokens": 128000, "mode": "responses", @@ -4684,7 +4684,7 @@ "cache_read_input_token_cost": 1.4e-07, "input_cost_per_token": 1.38e-06, "litellm_provider": "azure", - "max_input_tokens": 272000, + "max_input_tokens": 128000, "max_output_tokens": 128000, "max_tokens": 128000, "mode": "chat", @@ -7813,14 +7813,14 @@ "supports_vision": true }, "deepseek-chat": { - "cache_read_input_token_cost": 6e-08, - "input_cost_per_token": 6e-07, + "cache_read_input_token_cost": 2.8e-08, + "input_cost_per_token": 2.8e-07, "litellm_provider": "deepseek", "max_input_tokens": 131072, "max_output_tokens": 8192, "max_tokens": 8192, "mode": "chat", - "output_cost_per_token": 1.7e-06, + "output_cost_per_token": 4.2e-07, "source": "https://api-docs.deepseek.com/quick_start/pricing", "supported_endpoints": [ "/v1/chat/completions" @@ -7834,14 +7834,14 @@ "supports_tool_choice": true }, "deepseek-reasoner": { - "cache_read_input_token_cost": 6e-08, - "input_cost_per_token": 6e-07, + "cache_read_input_token_cost": 2.8e-08, + "input_cost_per_token": 2.8e-07, "litellm_provider": "deepseek", "max_input_tokens": 131072, "max_output_tokens": 65536, "max_tokens": 65536, "mode": "chat", - "output_cost_per_token": 1.7e-06, + "output_cost_per_token": 4.2e-07, "source": "https://api-docs.deepseek.com/quick_start/pricing", "supported_endpoints": [ "/v1/chat/completions" @@ -10045,15 +10045,15 @@ }, "deepseek/deepseek-chat": { "cache_creation_input_token_cost": 0.0, - "cache_read_input_token_cost": 7e-08, - "input_cost_per_token": 2.7e-07, - "input_cost_per_token_cache_hit": 7e-08, + "cache_read_input_token_cost": 2.8e-08, + "input_cost_per_token": 2.8e-07, + "input_cost_per_token_cache_hit": 2.8e-08, "litellm_provider": "deepseek", - "max_input_tokens": 65536, + "max_input_tokens": 128000, "max_output_tokens": 8192, "max_tokens": 8192, "mode": "chat", - "output_cost_per_token": 1.1e-06, + "output_cost_per_token": 4.2e-07, "supports_assistant_prefill": true, "supports_function_calling": true, "supports_prompt_caching": true, @@ -10089,14 +10089,15 @@ "supports_tool_choice": true }, "deepseek/deepseek-reasoner": { - "input_cost_per_token": 5.5e-07, - "input_cost_per_token_cache_hit": 1.4e-07, + "cache_read_input_token_cost": 2.8e-08, + "input_cost_per_token": 2.8e-07, + "input_cost_per_token_cache_hit": 2.8e-08, "litellm_provider": "deepseek", - "max_input_tokens": 65536, + "max_input_tokens": 128000, "max_output_tokens": 8192, "max_tokens": 8192, "mode": "chat", - "output_cost_per_token": 2.19e-06, + "output_cost_per_token": 4.2e-07, "supports_assistant_prefill": true, "supports_function_calling": true, "supports_prompt_caching": true, @@ -17470,7 +17471,7 @@ "input_cost_per_token_flex": 6.25e-07, "input_cost_per_token_priority": 2.5e-06, "litellm_provider": "openai", - "max_input_tokens": 272000, + "max_input_tokens": 400000, "max_output_tokens": 128000, "max_tokens": 128000, "mode": "chat", @@ -17507,7 +17508,7 @@ "input_cost_per_token": 1.25e-06, "input_cost_per_token_priority": 2.5e-06, "litellm_provider": "openai", - "max_input_tokens": 272000, + "max_input_tokens": 400000, "max_output_tokens": 128000, "max_tokens": 128000, "mode": "chat", @@ -17543,7 +17544,7 @@ "input_cost_per_token": 1.25e-06, "input_cost_per_token_priority": 2.5e-06, "litellm_provider": "openai", - "max_input_tokens": 272000, + "max_input_tokens": 400000, "max_output_tokens": 128000, "max_tokens": 128000, "mode": "chat", @@ -17852,7 +17853,7 @@ "input_cost_per_token_flex": 6.25e-07, "input_cost_per_token_priority": 2.5e-06, "litellm_provider": "openai", - "max_input_tokens": 272000, + "max_input_tokens": 400000, "max_output_tokens": 128000, "max_tokens": 128000, "mode": "chat", @@ -17887,9 +17888,9 @@ "cache_read_input_token_cost": 1.25e-07, "input_cost_per_token": 1.25e-06, "litellm_provider": "openai", - "max_input_tokens": 272000, - "max_output_tokens": 128000, - "max_tokens": 128000, + "max_input_tokens": 128000, + "max_output_tokens": 16384, + "max_tokens": 16384, "mode": "chat", "output_cost_per_token": 1e-05, "supported_endpoints": [ @@ -17951,7 +17952,7 @@ "cache_read_input_token_cost": 1.25e-07, "input_cost_per_token": 1.25e-06, "litellm_provider": "openai", - "max_input_tokens": 272000, + "max_input_tokens": 400000, "max_output_tokens": 128000, "max_tokens": 128000, "mode": "responses", @@ -17983,7 +17984,7 @@ "input_cost_per_token": 1.25e-06, "input_cost_per_token_priority": 2.5e-06, "litellm_provider": "openai", - "max_input_tokens": 272000, + "max_input_tokens": 400000, "max_output_tokens": 128000, "max_tokens": 128000, "mode": "responses", @@ -18046,7 +18047,7 @@ "input_cost_per_token": 2.5e-07, "input_cost_per_token_priority": 4.5e-07, "litellm_provider": "openai", - "max_input_tokens": 272000, + "max_input_tokens": 400000, "max_output_tokens": 128000, "max_tokens": 128000, "mode": "responses", @@ -18081,7 +18082,7 @@ "input_cost_per_token_flex": 1.25e-07, "input_cost_per_token_priority": 4.5e-07, "litellm_provider": "openai", - "max_input_tokens": 272000, + "max_input_tokens": 400000, "max_output_tokens": 128000, "max_tokens": 128000, "mode": "chat", @@ -18120,7 +18121,7 @@ "input_cost_per_token_flex": 1.25e-07, "input_cost_per_token_priority": 4.5e-07, "litellm_provider": "openai", - "max_input_tokens": 272000, + "max_input_tokens": 400000, "max_output_tokens": 128000, "max_tokens": 128000, "mode": "chat", @@ -18158,7 +18159,7 @@ "input_cost_per_token_flex": 2.5e-08, "input_cost_per_token_priority": 2.5e-06, "litellm_provider": "openai", - "max_input_tokens": 272000, + "max_input_tokens": 400000, "max_output_tokens": 128000, "max_tokens": 128000, "mode": "chat", @@ -18193,7 +18194,7 @@ "input_cost_per_token": 5e-08, "input_cost_per_token_flex": 2.5e-08, "litellm_provider": "openai", - "max_input_tokens": 272000, + "max_input_tokens": 400000, "max_output_tokens": 128000, "max_tokens": 128000, "mode": "chat", @@ -23206,9 +23207,9 @@ "cache_read_input_token_cost": 1.25e-07, "input_cost_per_token": 1.25e-06, "litellm_provider": "openrouter", - "max_input_tokens": 272000, - "max_output_tokens": 128000, - "max_tokens": 128000, + "max_input_tokens": 128000, + "max_output_tokens": 16384, + "max_tokens": 16384, "mode": "chat", "output_cost_per_token": 1e-05, "supported_modalities": [ @@ -23225,7 +23226,7 @@ "cache_read_input_token_cost": 1.25e-07, "input_cost_per_token": 1.25e-06, "litellm_provider": "openrouter", - "max_input_tokens": 272000, + "max_input_tokens": 400000, "max_output_tokens": 128000, "max_tokens": 128000, "mode": "chat", @@ -23244,7 +23245,7 @@ "cache_read_input_token_cost": 1.25e-07, "input_cost_per_token": 1.25e-06, "litellm_provider": "openrouter", - "max_input_tokens": 272000, + "max_input_tokens": 400000, "max_output_tokens": 128000, "max_tokens": 128000, "mode": "chat", @@ -23263,7 +23264,7 @@ "cache_read_input_token_cost": 2.5e-08, "input_cost_per_token": 2.5e-07, "litellm_provider": "openrouter", - "max_input_tokens": 272000, + "max_input_tokens": 400000, "max_output_tokens": 128000, "max_tokens": 128000, "mode": "chat", @@ -23282,7 +23283,7 @@ "cache_read_input_token_cost": 5e-09, "input_cost_per_token": 5e-08, "litellm_provider": "openrouter", - "max_input_tokens": 272000, + "max_input_tokens": 400000, "max_output_tokens": 128000, "max_tokens": 128000, "mode": "chat", diff --git a/model_prices_and_context_window.sha256 b/model_prices_and_context_window.sha256 index 6aae8cc5..5f4e63a2 100644 --- a/model_prices_and_context_window.sha256 +++ b/model_prices_and_context_window.sha256 @@ -1 +1 @@ -f12fec3516e45adc1566fd305e063b03f3b3c2bcd97cb84be1a1c61f3df9d0ca +c2ad0cd96d378b8cf8782b84ad06de436d2fa92cbd74c46afd8862c9b577ea70