diff --git a/README.md b/README.md index 8c96ba88..951a8dde 100644 --- a/README.md +++ b/README.md @@ -478,6 +478,18 @@ claude gemini # 或其他 Gemini CLI 命令 ``` +**Codex 设置环境变量:** +```bash +export OPENAI_BASE_URL="http://127.0.0.1:3000/openai" # 根据实际填写你服务器的ip地址或者域名 +export OPENAI_API_KEY="后台创建的API密钥" # 使用后台创建的API密钥,格式如 cr_9022cccc8d42e94db4d6f6d27bc93a5d271be86cf86d4d167627eb31eb4492eb +``` + +**使用 Codex:** +```bash +# 配置环境变量后,即可正常使用支持 OpenAI API 的工具 +# 例如使用支持 OpenAI API 的代码补全工具等 +``` + ### 5. 第三方工具API接入 本服务支持多种API端点格式,方便接入不同的第三方工具(如Cherry Studio等): diff --git a/resources/model-pricing/README.md b/resources/model-pricing/README.md index 3f297bfa..e6cfac4e 100644 --- a/resources/model-pricing/README.md +++ b/resources/model-pricing/README.md @@ -34,4 +34,4 @@ The file contains JSON data with model pricing information including: - Context window sizes - Model capabilities -Last updated: 2025-08-06 \ No newline at end of file +Last updated: 2025-08-10 \ No newline at end of file diff --git a/resources/model-pricing/model_prices_and_context_window.json b/resources/model-pricing/model_prices_and_context_window.json index d7ef624a..04343ee0 100644 --- a/resources/model-pricing/model_prices_and_context_window.json +++ b/resources/model-pricing/model_prices_and_context_window.json @@ -3,9 +3,9 @@ "max_tokens": "LEGACY parameter. set to max_output_tokens if provider specifies it. IF not set to max_input_tokens, if provider specifies it.", "max_input_tokens": "max input tokens, if the provider specifies it. if not default to max_tokens", "max_output_tokens": "max output tokens, if the provider specifies it. if not default to max_tokens", - "input_cost_per_token": 0.0, - "output_cost_per_token": 0.0, - "output_cost_per_reasoning_token": 0.0, + "input_cost_per_token": 0, + "output_cost_per_token": 0, + "output_cost_per_reasoning_token": 0, "litellm_provider": "one of https://docs.litellm.ai/docs/providers", "mode": "one of: chat, embedding, completion, image_generation, audio_transcription, audio_speech, image_generation, moderation, rerank", "supports_function_calling": true, @@ -19,16 +19,16 @@ "supports_reasoning": true, "supports_web_search": true, "search_context_cost_per_query": { - "search_context_size_low": 0.0, - "search_context_size_medium": 0.0, - "search_context_size_high": 0.0 + "search_context_size_low": 0, + "search_context_size_medium": 0, + "search_context_size_high": 0 }, - "file_search_cost_per_1k_calls": 0.0, - "file_search_cost_per_gb_per_day": 0.0, - "vector_store_cost_per_gb_per_day": 0.0, - "computer_use_input_cost_per_1k_tokens": 0.0, - "computer_use_output_cost_per_1k_tokens": 0.0, - "code_interpreter_cost_per_session": 0.0, + "file_search_cost_per_1k_calls": 0, + "file_search_cost_per_gb_per_day": 0, + "vector_store_cost_per_gb_per_day": 0, + "computer_use_input_cost_per_1k_tokens": 0, + "computer_use_output_cost_per_1k_tokens": 0, + "code_interpreter_cost_per_session": 0, "supported_regions": [ "global", "us-west-2", @@ -42,8 +42,8 @@ "max_tokens": 32768, "max_input_tokens": 32768, "max_output_tokens": 0, - "input_cost_per_token": 0.0, - "output_cost_per_token": 0.0, + "input_cost_per_token": 0, + "output_cost_per_token": 0, "litellm_provider": "openai", "mode": "moderation" }, @@ -51,8 +51,8 @@ "max_tokens": 32768, "max_input_tokens": 32768, "max_output_tokens": 0, - "input_cost_per_token": 0.0, - "output_cost_per_token": 0.0, + "input_cost_per_token": 0, + "output_cost_per_token": 0, "litellm_provider": "openai", "mode": "moderation" }, @@ -60,8 +60,8 @@ "max_tokens": 32768, "max_input_tokens": 32768, "max_output_tokens": 0, - "input_cost_per_token": 0.0, - "output_cost_per_token": 0.0, + "input_cost_per_token": 0, + "output_cost_per_token": 0, "litellm_provider": "openai", "mode": "moderation" }, @@ -69,8 +69,8 @@ "max_tokens": 4096, "max_input_tokens": 8192, "max_output_tokens": 4096, - "input_cost_per_token": 3e-05, - "output_cost_per_token": 6e-05, + "input_cost_per_token": 0.00003, + "output_cost_per_token": 0.00006, "litellm_provider": "openai", "mode": "chat", "supports_function_calling": true, @@ -82,11 +82,11 @@ "max_tokens": 32768, "max_input_tokens": 1047576, "max_output_tokens": 32768, - "input_cost_per_token": 2e-06, - "output_cost_per_token": 8e-06, - "input_cost_per_token_batches": 1e-06, - "output_cost_per_token_batches": 4e-06, - "cache_read_input_token_cost": 5e-07, + "input_cost_per_token": 0.000002, + "output_cost_per_token": 0.000008, + "input_cost_per_token_batches": 0.000001, + "output_cost_per_token_batches": 0.000004, + "cache_read_input_token_cost": 5e-7, "litellm_provider": "openai", "mode": "chat", "supported_endpoints": [ @@ -115,11 +115,11 @@ "max_tokens": 32768, "max_input_tokens": 1047576, "max_output_tokens": 32768, - "input_cost_per_token": 2e-06, - "output_cost_per_token": 8e-06, - "input_cost_per_token_batches": 1e-06, - "output_cost_per_token_batches": 4e-06, - "cache_read_input_token_cost": 5e-07, + "input_cost_per_token": 0.000002, + "output_cost_per_token": 0.000008, + "input_cost_per_token_batches": 0.000001, + "output_cost_per_token_batches": 0.000004, + "cache_read_input_token_cost": 5e-7, "litellm_provider": "openai", "mode": "chat", "supported_endpoints": [ @@ -148,11 +148,11 @@ "max_tokens": 32768, "max_input_tokens": 1047576, "max_output_tokens": 32768, - "input_cost_per_token": 4e-07, - "output_cost_per_token": 1.6e-06, - "input_cost_per_token_batches": 2e-07, - "output_cost_per_token_batches": 8e-07, - "cache_read_input_token_cost": 1e-07, + "input_cost_per_token": 4e-7, + "output_cost_per_token": 0.0000016, + "input_cost_per_token_batches": 2e-7, + "output_cost_per_token_batches": 8e-7, + "cache_read_input_token_cost": 1e-7, "litellm_provider": "openai", "mode": "chat", "supported_endpoints": [ @@ -181,11 +181,11 @@ "max_tokens": 32768, "max_input_tokens": 1047576, "max_output_tokens": 32768, - "input_cost_per_token": 4e-07, - "output_cost_per_token": 1.6e-06, - "input_cost_per_token_batches": 2e-07, - "output_cost_per_token_batches": 8e-07, - "cache_read_input_token_cost": 1e-07, + "input_cost_per_token": 4e-7, + "output_cost_per_token": 0.0000016, + "input_cost_per_token_batches": 2e-7, + "output_cost_per_token_batches": 8e-7, + "cache_read_input_token_cost": 1e-7, "litellm_provider": "openai", "mode": "chat", "supported_endpoints": [ @@ -214,11 +214,11 @@ "max_tokens": 32768, "max_input_tokens": 1047576, "max_output_tokens": 32768, - "input_cost_per_token": 1e-07, - "output_cost_per_token": 4e-07, - "input_cost_per_token_batches": 5e-08, - "output_cost_per_token_batches": 2e-07, - "cache_read_input_token_cost": 2.5e-08, + "input_cost_per_token": 1e-7, + "output_cost_per_token": 4e-7, + "input_cost_per_token_batches": 5e-8, + "output_cost_per_token_batches": 2e-7, + "cache_read_input_token_cost": 2.5e-8, "litellm_provider": "openai", "mode": "chat", "supported_endpoints": [ @@ -247,11 +247,11 @@ "max_tokens": 32768, "max_input_tokens": 1047576, "max_output_tokens": 32768, - "input_cost_per_token": 1e-07, - "output_cost_per_token": 4e-07, - "input_cost_per_token_batches": 5e-08, - "output_cost_per_token_batches": 2e-07, - "cache_read_input_token_cost": 2.5e-08, + "input_cost_per_token": 1e-7, + "output_cost_per_token": 4e-7, + "input_cost_per_token_batches": 5e-8, + "output_cost_per_token_batches": 2e-7, + "cache_read_input_token_cost": 2.5e-8, "litellm_provider": "openai", "mode": "chat", "supported_endpoints": [ @@ -280,11 +280,11 @@ "max_tokens": 16384, "max_input_tokens": 128000, "max_output_tokens": 16384, - "input_cost_per_token": 2.5e-06, - "output_cost_per_token": 1e-05, - "input_cost_per_token_batches": 1.25e-06, - "output_cost_per_token_batches": 5e-06, - "cache_read_input_token_cost": 1.25e-06, + "input_cost_per_token": 0.0000025, + "output_cost_per_token": 0.00001, + "input_cost_per_token_batches": 0.00000125, + "output_cost_per_token_batches": 0.000005, + "cache_read_input_token_cost": 0.00000125, "litellm_provider": "openai", "mode": "chat", "supports_pdf_input": true, @@ -318,8 +318,8 @@ "max_tokens": 131072, "max_input_tokens": 131072, "max_output_tokens": 16384, - "input_cost_per_token": 3e-06, - "output_cost_per_token": 1e-05, + "input_cost_per_token": 0.000003, + "output_cost_per_token": 0.00001, "litellm_provider": "watsonx", "mode": "chat", "supports_function_calling": true, @@ -336,11 +336,11 @@ "max_tokens": 16384, "max_input_tokens": 128000, "max_output_tokens": 16384, - "input_cost_per_token": 2.5e-06, - "output_cost_per_token": 1e-05, - "input_cost_per_token_batches": 1.25e-06, - "output_cost_per_token_batches": 5e-06, - "cache_read_input_token_cost": 1.25e-06, + "input_cost_per_token": 0.0000025, + "output_cost_per_token": 0.00001, + "input_cost_per_token_batches": 0.00000125, + "output_cost_per_token_batches": 0.000005, + "cache_read_input_token_cost": 0.00000125, "litellm_provider": "openai", "mode": "chat", "supports_pdf_input": true, @@ -356,11 +356,11 @@ "max_tokens": 16384, "max_input_tokens": 128000, "max_output_tokens": 16384, - "input_cost_per_token": 2.5e-06, - "output_cost_per_token": 1e-05, - "input_cost_per_token_batches": 1.25e-06, - "output_cost_per_token_batches": 5e-06, - "cache_read_input_token_cost": 1.25e-06, + "input_cost_per_token": 0.0000025, + "output_cost_per_token": 0.00001, + "input_cost_per_token_batches": 0.00000125, + "output_cost_per_token_batches": 0.000005, + "cache_read_input_token_cost": 0.00000125, "litellm_provider": "openai", "mode": "chat", "supports_pdf_input": true, @@ -382,11 +382,11 @@ "max_tokens": 16384, "max_input_tokens": 128000, "max_output_tokens": 16384, - "input_cost_per_token": 7.5e-05, + "input_cost_per_token": 0.000075, "output_cost_per_token": 0.00015, - "input_cost_per_token_batches": 3.75e-05, - "output_cost_per_token_batches": 7.5e-05, - "cache_read_input_token_cost": 3.75e-05, + "input_cost_per_token_batches": 0.0000375, + "output_cost_per_token_batches": 0.000075, + "cache_read_input_token_cost": 0.0000375, "litellm_provider": "openai", "mode": "chat", "supports_pdf_input": true, @@ -402,11 +402,11 @@ "max_tokens": 16384, "max_input_tokens": 128000, "max_output_tokens": 16384, - "input_cost_per_token": 7.5e-05, + "input_cost_per_token": 0.000075, "output_cost_per_token": 0.00015, - "input_cost_per_token_batches": 3.75e-05, - "output_cost_per_token_batches": 7.5e-05, - "cache_read_input_token_cost": 3.75e-05, + "input_cost_per_token_batches": 0.0000375, + "output_cost_per_token_batches": 0.000075, + "cache_read_input_token_cost": 0.0000375, "litellm_provider": "openai", "mode": "chat", "supports_pdf_input": true, @@ -423,9 +423,9 @@ "max_tokens": 16384, "max_input_tokens": 128000, "max_output_tokens": 16384, - "input_cost_per_token": 2.5e-06, + "input_cost_per_token": 0.0000025, "input_cost_per_audio_token": 0.0001, - "output_cost_per_token": 1e-05, + "output_cost_per_token": 0.00001, "output_cost_per_audio_token": 0.0002, "litellm_provider": "openai", "mode": "chat", @@ -440,10 +440,10 @@ "max_tokens": 16384, "max_input_tokens": 128000, "max_output_tokens": 16384, - "input_cost_per_token": 2.5e-06, - "input_cost_per_audio_token": 4e-05, - "output_cost_per_token": 1e-05, - "output_cost_per_audio_token": 8e-05, + "input_cost_per_token": 0.0000025, + "input_cost_per_audio_token": 0.00004, + "output_cost_per_token": 0.00001, + "output_cost_per_audio_token": 0.00008, "litellm_provider": "openai", "mode": "chat", "supports_function_calling": true, @@ -457,9 +457,9 @@ "max_tokens": 16384, "max_input_tokens": 128000, "max_output_tokens": 16384, - "input_cost_per_token": 2.5e-06, + "input_cost_per_token": 0.0000025, "input_cost_per_audio_token": 0.0001, - "output_cost_per_token": 1e-05, + "output_cost_per_token": 0.00001, "output_cost_per_audio_token": 0.0002, "litellm_provider": "openai", "mode": "chat", @@ -474,10 +474,10 @@ "max_tokens": 16384, "max_input_tokens": 128000, "max_output_tokens": 16384, - "input_cost_per_token": 2.5e-06, - "input_cost_per_audio_token": 4e-05, - "output_cost_per_token": 1e-05, - "output_cost_per_audio_token": 8e-05, + "input_cost_per_token": 0.0000025, + "input_cost_per_audio_token": 0.00004, + "output_cost_per_token": 0.00001, + "output_cost_per_audio_token": 0.00008, "litellm_provider": "openai", "mode": "chat", "supports_function_calling": true, @@ -491,10 +491,10 @@ "max_tokens": 16384, "max_input_tokens": 128000, "max_output_tokens": 16384, - "input_cost_per_token": 1.5e-07, - "input_cost_per_audio_token": 1e-05, - "output_cost_per_token": 6e-07, - "output_cost_per_audio_token": 2e-05, + "input_cost_per_token": 1.5e-7, + "input_cost_per_audio_token": 0.00001, + "output_cost_per_token": 6e-7, + "output_cost_per_audio_token": 0.00002, "litellm_provider": "openai", "mode": "chat", "supports_function_calling": true, @@ -508,10 +508,10 @@ "max_tokens": 16384, "max_input_tokens": 128000, "max_output_tokens": 16384, - "input_cost_per_token": 1.5e-07, - "input_cost_per_audio_token": 1e-05, - "output_cost_per_token": 6e-07, - "output_cost_per_audio_token": 2e-05, + "input_cost_per_token": 1.5e-7, + "input_cost_per_audio_token": 0.00001, + "output_cost_per_token": 6e-7, + "output_cost_per_audio_token": 0.00002, "litellm_provider": "openai", "mode": "chat", "supports_function_calling": true, @@ -525,11 +525,11 @@ "max_tokens": 16384, "max_input_tokens": 128000, "max_output_tokens": 16384, - "input_cost_per_token": 1.5e-07, - "output_cost_per_token": 6e-07, - "input_cost_per_token_batches": 7.5e-08, - "output_cost_per_token_batches": 3e-07, - "cache_read_input_token_cost": 7.5e-08, + "input_cost_per_token": 1.5e-7, + "output_cost_per_token": 6e-7, + "input_cost_per_token_batches": 7.5e-8, + "output_cost_per_token_batches": 3e-7, + "cache_read_input_token_cost": 7.5e-8, "litellm_provider": "openai", "mode": "chat", "supports_pdf_input": true, @@ -545,11 +545,11 @@ "max_tokens": 16384, "max_input_tokens": 128000, "max_output_tokens": 16384, - "input_cost_per_token": 1.5e-07, - "output_cost_per_token": 6e-07, - "input_cost_per_token_batches": 7.5e-08, - "output_cost_per_token_batches": 3e-07, - "cache_read_input_token_cost": 7.5e-08, + "input_cost_per_token": 1.5e-7, + "output_cost_per_token": 6e-7, + "input_cost_per_token_batches": 7.5e-8, + "output_cost_per_token_batches": 3e-7, + "cache_read_input_token_cost": 7.5e-8, "litellm_provider": "openai", "mode": "chat", "supports_pdf_input": true, @@ -565,11 +565,11 @@ "max_tokens": 16384, "max_input_tokens": 128000, "max_output_tokens": 16384, - "input_cost_per_token": 1.5e-07, - "output_cost_per_token": 6e-07, - "input_cost_per_token_batches": 7.5e-08, - "output_cost_per_token_batches": 3e-07, - "cache_read_input_token_cost": 7.5e-08, + "input_cost_per_token": 1.5e-7, + "output_cost_per_token": 6e-7, + "input_cost_per_token_batches": 7.5e-8, + "output_cost_per_token_batches": 3e-7, + "cache_read_input_token_cost": 7.5e-8, "litellm_provider": "openai", "mode": "chat", "supports_pdf_input": true, @@ -591,11 +591,11 @@ "max_tokens": 16384, "max_input_tokens": 128000, "max_output_tokens": 16384, - "input_cost_per_token": 1.5e-07, - "output_cost_per_token": 6e-07, - "input_cost_per_token_batches": 7.5e-08, - "output_cost_per_token_batches": 3e-07, - "cache_read_input_token_cost": 7.5e-08, + "input_cost_per_token": 1.5e-7, + "output_cost_per_token": 6e-7, + "input_cost_per_token_batches": 7.5e-8, + "output_cost_per_token_batches": 3e-7, + "cache_read_input_token_cost": 7.5e-8, "litellm_provider": "openai", "mode": "chat", "supports_pdf_input": true, @@ -612,13 +612,269 @@ "search_context_size_high": 0.03 } }, + "gpt-5": { + "max_tokens": 128000, + "max_input_tokens": 400000, + "max_output_tokens": 128000, + "input_cost_per_token": 0.00000125, + "output_cost_per_token": 0.00001, + "cache_read_input_token_cost": 1.25e-7, + "litellm_provider": "openai", + "mode": "chat", + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/batch", + "/v1/responses" + ], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text" + ], + "supports_pdf_input": true, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_response_schema": true, + "supports_vision": true, + "supports_prompt_caching": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_native_streaming": true, + "supports_reasoning": true + }, + "gpt-5-mini": { + "max_tokens": 128000, + "max_input_tokens": 400000, + "max_output_tokens": 128000, + "input_cost_per_token": 2.5e-7, + "output_cost_per_token": 0.000002, + "cache_read_input_token_cost": 2.5e-8, + "litellm_provider": "openai", + "mode": "chat", + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/batch", + "/v1/responses" + ], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text" + ], + "supports_pdf_input": true, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_response_schema": true, + "supports_vision": true, + "supports_prompt_caching": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_native_streaming": true, + "supports_reasoning": true + }, + "gpt-5-nano": { + "max_tokens": 128000, + "max_input_tokens": 400000, + "max_output_tokens": 128000, + "input_cost_per_token": 5e-8, + "output_cost_per_token": 4e-7, + "cache_read_input_token_cost": 5e-9, + "litellm_provider": "openai", + "mode": "chat", + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/batch", + "/v1/responses" + ], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text" + ], + "supports_pdf_input": true, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_response_schema": true, + "supports_vision": true, + "supports_prompt_caching": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_native_streaming": true, + "supports_reasoning": true + }, + "gpt-5-chat": { + "max_tokens": 128000, + "max_input_tokens": 400000, + "max_output_tokens": 128000, + "input_cost_per_token": 0.00000125, + "output_cost_per_token": 0.00001, + "cache_read_input_token_cost": 1.25e-7, + "litellm_provider": "openai", + "mode": "chat", + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/batch", + "/v1/responses" + ], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text" + ], + "supports_pdf_input": true, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_response_schema": true, + "supports_vision": true, + "supports_prompt_caching": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_native_streaming": true, + "supports_reasoning": true + }, + "gpt-5-chat-latest": { + "max_tokens": 128000, + "max_input_tokens": 400000, + "max_output_tokens": 128000, + "input_cost_per_token": 0.00000125, + "output_cost_per_token": 0.00001, + "cache_read_input_token_cost": 1.25e-7, + "litellm_provider": "openai", + "mode": "chat", + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/batch", + "/v1/responses" + ], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text" + ], + "supports_pdf_input": true, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_response_schema": true, + "supports_vision": true, + "supports_prompt_caching": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_native_streaming": true, + "supports_reasoning": true + }, + "gpt-5-2025-08-07": { + "max_tokens": 128000, + "max_input_tokens": 400000, + "max_output_tokens": 128000, + "input_cost_per_token": 0.00000125, + "output_cost_per_token": 0.00001, + "cache_read_input_token_cost": 1.25e-7, + "litellm_provider": "openai", + "mode": "chat", + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/batch", + "/v1/responses" + ], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text" + ], + "supports_pdf_input": true, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_response_schema": true, + "supports_vision": true, + "supports_prompt_caching": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_native_streaming": true, + "supports_reasoning": true + }, + "gpt-5-mini-2025-08-07": { + "max_tokens": 128000, + "max_input_tokens": 400000, + "max_output_tokens": 128000, + "input_cost_per_token": 2.5e-7, + "output_cost_per_token": 0.000002, + "cache_read_input_token_cost": 2.5e-8, + "litellm_provider": "openai", + "mode": "chat", + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/batch", + "/v1/responses" + ], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text" + ], + "supports_pdf_input": true, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_response_schema": true, + "supports_vision": true, + "supports_prompt_caching": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_native_streaming": true, + "supports_reasoning": true + }, + "gpt-5-nano-2025-08-07": { + "max_tokens": 128000, + "max_input_tokens": 400000, + "max_output_tokens": 128000, + "input_cost_per_token": 5e-8, + "output_cost_per_token": 4e-7, + "cache_read_input_token_cost": 5e-9, + "litellm_provider": "openai", + "mode": "chat", + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/batch", + "/v1/responses" + ], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text" + ], + "supports_pdf_input": true, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_response_schema": true, + "supports_vision": true, + "supports_prompt_caching": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_native_streaming": true, + "supports_reasoning": true + }, "codex-mini-latest": { "max_tokens": 100000, "max_input_tokens": 200000, "max_output_tokens": 100000, - "input_cost_per_token": 1.5e-06, - "output_cost_per_token": 6e-06, - "cache_read_input_token_cost": 3.75e-07, + "input_cost_per_token": 0.0000015, + "output_cost_per_token": 0.000006, + "cache_read_input_token_cost": 3.75e-7, "litellm_provider": "openai", "mode": "responses", "supports_pdf_input": true, @@ -647,7 +903,7 @@ "max_output_tokens": 100000, "input_cost_per_token": 0.00015, "output_cost_per_token": 0.0006, - "input_cost_per_token_batches": 7.5e-05, + "input_cost_per_token_batches": 0.000075, "output_cost_per_token_batches": 0.0003, "litellm_provider": "openai", "mode": "responses", @@ -679,7 +935,7 @@ "max_output_tokens": 100000, "input_cost_per_token": 0.00015, "output_cost_per_token": 0.0006, - "input_cost_per_token_batches": 7.5e-05, + "input_cost_per_token_batches": 0.000075, "output_cost_per_token_batches": 0.0003, "litellm_provider": "openai", "mode": "responses", @@ -709,9 +965,9 @@ "max_tokens": 100000, "max_input_tokens": 200000, "max_output_tokens": 100000, - "input_cost_per_token": 1.5e-05, - "output_cost_per_token": 6e-05, - "cache_read_input_token_cost": 7.5e-06, + "input_cost_per_token": 0.000015, + "output_cost_per_token": 0.00006, + "cache_read_input_token_cost": 0.0000075, "litellm_provider": "openai", "mode": "chat", "supports_function_calling": true, @@ -728,9 +984,9 @@ "max_tokens": 65536, "max_input_tokens": 128000, "max_output_tokens": 65536, - "input_cost_per_token": 1.1e-06, - "output_cost_per_token": 4.4e-06, - "cache_read_input_token_cost": 5.5e-07, + "input_cost_per_token": 0.0000011, + "output_cost_per_token": 0.0000044, + "cache_read_input_token_cost": 5.5e-7, "litellm_provider": "openai", "mode": "chat", "supports_vision": true, @@ -741,8 +997,8 @@ "max_tokens": 1024, "max_input_tokens": 8192, "max_output_tokens": 1024, - "input_cost_per_token": 3e-06, - "output_cost_per_token": 1.2e-05, + "input_cost_per_token": 0.000003, + "output_cost_per_token": 0.000012, "litellm_provider": "azure", "mode": "chat", "supported_endpoints": [ @@ -768,11 +1024,11 @@ "max_tokens": 100000, "max_input_tokens": 200000, "max_output_tokens": 100000, - "input_cost_per_token": 1e-05, - "output_cost_per_token": 4e-05, - "input_cost_per_token_batches": 5e-06, - "output_cost_per_token_batches": 2e-05, - "cache_read_input_token_cost": 2.5e-06, + "input_cost_per_token": 0.00001, + "output_cost_per_token": 0.00004, + "input_cost_per_token_batches": 0.000005, + "output_cost_per_token_batches": 0.00002, + "cache_read_input_token_cost": 0.0000025, "litellm_provider": "openai", "mode": "responses", "supported_endpoints": [ @@ -801,11 +1057,11 @@ "max_tokens": 100000, "max_input_tokens": 200000, "max_output_tokens": 100000, - "input_cost_per_token": 1e-05, - "output_cost_per_token": 4e-05, - "input_cost_per_token_batches": 5e-06, - "output_cost_per_token_batches": 2e-05, - "cache_read_input_token_cost": 2.5e-06, + "input_cost_per_token": 0.00001, + "output_cost_per_token": 0.00004, + "input_cost_per_token_batches": 0.000005, + "output_cost_per_token_batches": 0.00002, + "cache_read_input_token_cost": 0.0000025, "litellm_provider": "openai", "mode": "responses", "supported_endpoints": [ @@ -834,10 +1090,10 @@ "max_tokens": 100000, "max_input_tokens": 200000, "max_output_tokens": 100000, - "input_cost_per_token": 2e-05, - "input_cost_per_token_batches": 1e-05, - "output_cost_per_token_batches": 4e-05, - "output_cost_per_token": 8e-05, + "input_cost_per_token": 0.00002, + "input_cost_per_token_batches": 0.00001, + "output_cost_per_token_batches": 0.00004, + "output_cost_per_token": 0.00008, "litellm_provider": "openai", "mode": "responses", "supports_function_calling": true, @@ -864,10 +1120,10 @@ "max_tokens": 100000, "max_input_tokens": 200000, "max_output_tokens": 100000, - "input_cost_per_token": 2e-05, - "input_cost_per_token_batches": 1e-05, - "output_cost_per_token_batches": 4e-05, - "output_cost_per_token": 8e-05, + "input_cost_per_token": 0.00002, + "input_cost_per_token_batches": 0.00001, + "output_cost_per_token_batches": 0.00004, + "output_cost_per_token": 0.00008, "litellm_provider": "openai", "mode": "responses", "supports_function_calling": true, @@ -894,9 +1150,9 @@ "max_tokens": 100000, "max_input_tokens": 200000, "max_output_tokens": 100000, - "input_cost_per_token": 2e-06, - "output_cost_per_token": 8e-06, - "cache_read_input_token_cost": 5e-07, + "input_cost_per_token": 0.000002, + "output_cost_per_token": 0.000008, + "cache_read_input_token_cost": 5e-7, "litellm_provider": "openai", "mode": "chat", "supports_function_calling": true, @@ -925,9 +1181,9 @@ "max_tokens": 100000, "max_input_tokens": 200000, "max_output_tokens": 100000, - "input_cost_per_token": 2e-06, - "output_cost_per_token": 8e-06, - "cache_read_input_token_cost": 5e-07, + "input_cost_per_token": 0.000002, + "output_cost_per_token": 0.000008, + "cache_read_input_token_cost": 5e-7, "litellm_provider": "openai", "mode": "chat", "supports_function_calling": true, @@ -956,9 +1212,9 @@ "max_tokens": 100000, "max_input_tokens": 200000, "max_output_tokens": 100000, - "input_cost_per_token": 1.1e-06, - "output_cost_per_token": 4.4e-06, - "cache_read_input_token_cost": 5.5e-07, + "input_cost_per_token": 0.0000011, + "output_cost_per_token": 0.0000044, + "cache_read_input_token_cost": 5.5e-7, "litellm_provider": "openai", "mode": "chat", "supports_function_calling": true, @@ -973,9 +1229,9 @@ "max_tokens": 100000, "max_input_tokens": 200000, "max_output_tokens": 100000, - "input_cost_per_token": 1.1e-06, - "output_cost_per_token": 4.4e-06, - "cache_read_input_token_cost": 5.5e-07, + "input_cost_per_token": 0.0000011, + "output_cost_per_token": 0.0000044, + "cache_read_input_token_cost": 5.5e-7, "litellm_provider": "openai", "mode": "chat", "supports_function_calling": true, @@ -990,9 +1246,9 @@ "max_tokens": 100000, "max_input_tokens": 200000, "max_output_tokens": 100000, - "input_cost_per_token": 1.1e-06, - "output_cost_per_token": 4.4e-06, - "cache_read_input_token_cost": 2.75e-07, + "input_cost_per_token": 0.0000011, + "output_cost_per_token": 0.0000044, + "cache_read_input_token_cost": 2.75e-7, "litellm_provider": "openai", "mode": "chat", "supports_pdf_input": true, @@ -1008,11 +1264,11 @@ "max_tokens": 100000, "max_input_tokens": 200000, "max_output_tokens": 100000, - "input_cost_per_token": 2e-06, - "output_cost_per_token": 8e-06, - "input_cost_per_token_batches": 1e-06, - "output_cost_per_token_batches": 4e-06, - "cache_read_input_token_cost": 5e-07, + "input_cost_per_token": 0.000002, + "output_cost_per_token": 0.000008, + "input_cost_per_token_batches": 0.000001, + "output_cost_per_token_batches": 0.000004, + "cache_read_input_token_cost": 5e-7, "litellm_provider": "openai", "mode": "responses", "supported_endpoints": [ @@ -1041,11 +1297,11 @@ "max_tokens": 100000, "max_input_tokens": 200000, "max_output_tokens": 100000, - "input_cost_per_token": 2e-06, - "output_cost_per_token": 8e-06, - "input_cost_per_token_batches": 1e-06, - "output_cost_per_token_batches": 4e-06, - "cache_read_input_token_cost": 5e-07, + "input_cost_per_token": 0.000002, + "output_cost_per_token": 0.000008, + "input_cost_per_token_batches": 0.000001, + "output_cost_per_token_batches": 0.000004, + "cache_read_input_token_cost": 5e-7, "litellm_provider": "openai", "mode": "responses", "supported_endpoints": [ @@ -1074,9 +1330,9 @@ "max_tokens": 100000, "max_input_tokens": 200000, "max_output_tokens": 100000, - "input_cost_per_token": 1.1e-06, - "output_cost_per_token": 4.4e-06, - "cache_read_input_token_cost": 2.75e-07, + "input_cost_per_token": 0.0000011, + "output_cost_per_token": 0.0000044, + "cache_read_input_token_cost": 2.75e-7, "litellm_provider": "openai", "mode": "chat", "supports_pdf_input": true, @@ -1092,9 +1348,9 @@ "max_tokens": 65536, "max_input_tokens": 128000, "max_output_tokens": 65536, - "input_cost_per_token": 3e-06, - "output_cost_per_token": 1.2e-05, - "cache_read_input_token_cost": 1.5e-06, + "input_cost_per_token": 0.000003, + "output_cost_per_token": 0.000012, + "cache_read_input_token_cost": 0.0000015, "litellm_provider": "openai", "mode": "chat", "supports_pdf_input": true, @@ -1106,9 +1362,9 @@ "max_tokens": 32768, "max_input_tokens": 128000, "max_output_tokens": 32768, - "input_cost_per_token": 1.5e-05, - "output_cost_per_token": 6e-05, - "cache_read_input_token_cost": 7.5e-06, + "input_cost_per_token": 0.000015, + "output_cost_per_token": 0.00006, + "cache_read_input_token_cost": 0.0000075, "litellm_provider": "openai", "mode": "chat", "supports_pdf_input": true, @@ -1120,9 +1376,9 @@ "max_tokens": 32768, "max_input_tokens": 128000, "max_output_tokens": 32768, - "input_cost_per_token": 1.5e-05, - "output_cost_per_token": 6e-05, - "cache_read_input_token_cost": 7.5e-06, + "input_cost_per_token": 0.000015, + "output_cost_per_token": 0.00006, + "cache_read_input_token_cost": 0.0000075, "litellm_provider": "openai", "mode": "chat", "supports_pdf_input": true, @@ -1134,9 +1390,9 @@ "max_tokens": 100000, "max_input_tokens": 200000, "max_output_tokens": 100000, - "input_cost_per_token": 1.5e-05, - "output_cost_per_token": 6e-05, - "cache_read_input_token_cost": 7.5e-06, + "input_cost_per_token": 0.000015, + "output_cost_per_token": 0.00006, + "cache_read_input_token_cost": 0.0000075, "litellm_provider": "openai", "mode": "chat", "supports_pdf_input": true, @@ -1153,8 +1409,8 @@ "max_tokens": 4096, "max_input_tokens": 128000, "max_output_tokens": 4096, - "input_cost_per_token": 5e-06, - "output_cost_per_token": 1.5e-05, + "input_cost_per_token": 0.000005, + "output_cost_per_token": 0.000015, "litellm_provider": "openai", "mode": "chat", "supports_pdf_input": true, @@ -1169,10 +1425,10 @@ "max_tokens": 4096, "max_input_tokens": 128000, "max_output_tokens": 4096, - "input_cost_per_token": 5e-06, - "output_cost_per_token": 1.5e-05, - "input_cost_per_token_batches": 2.5e-06, - "output_cost_per_token_batches": 7.5e-06, + "input_cost_per_token": 0.000005, + "output_cost_per_token": 0.000015, + "input_cost_per_token_batches": 0.0000025, + "output_cost_per_token_batches": 0.0000075, "litellm_provider": "openai", "mode": "chat", "supports_pdf_input": true, @@ -1187,11 +1443,11 @@ "max_tokens": 16384, "max_input_tokens": 128000, "max_output_tokens": 16384, - "input_cost_per_token": 2.5e-06, - "output_cost_per_token": 1e-05, - "input_cost_per_token_batches": 1.25e-06, - "output_cost_per_token_batches": 5e-06, - "cache_read_input_token_cost": 1.25e-06, + "input_cost_per_token": 0.0000025, + "output_cost_per_token": 0.00001, + "input_cost_per_token_batches": 0.00000125, + "output_cost_per_token_batches": 0.000005, + "cache_read_input_token_cost": 0.00000125, "litellm_provider": "openai", "mode": "chat", "supports_pdf_input": true, @@ -1207,11 +1463,11 @@ "max_tokens": 16384, "max_input_tokens": 128000, "max_output_tokens": 16384, - "input_cost_per_token": 2.5e-06, - "output_cost_per_token": 1e-05, - "input_cost_per_token_batches": 1.25e-06, - "output_cost_per_token_batches": 5e-06, - "cache_read_input_token_cost": 1.25e-06, + "input_cost_per_token": 0.0000025, + "output_cost_per_token": 0.00001, + "input_cost_per_token_batches": 0.00000125, + "output_cost_per_token_batches": 0.000005, + "cache_read_input_token_cost": 0.00000125, "litellm_provider": "openai", "mode": "chat", "supports_pdf_input": true, @@ -1227,11 +1483,11 @@ "max_tokens": 4096, "max_input_tokens": 128000, "max_output_tokens": 4096, - "input_cost_per_token": 5e-06, + "input_cost_per_token": 0.000005, "input_cost_per_audio_token": 0.0001, - "cache_read_input_token_cost": 2.5e-06, - "cache_creation_input_audio_token_cost": 2e-05, - "output_cost_per_token": 2e-05, + "cache_read_input_token_cost": 0.0000025, + "cache_creation_input_audio_token_cost": 0.00002, + "output_cost_per_token": 0.00002, "output_cost_per_audio_token": 0.0002, "litellm_provider": "openai", "mode": "chat", @@ -1246,11 +1502,11 @@ "max_tokens": 4096, "max_input_tokens": 128000, "max_output_tokens": 4096, - "input_cost_per_token": 5e-06, - "input_cost_per_audio_token": 4e-05, - "cache_read_input_token_cost": 2.5e-06, - "output_cost_per_token": 2e-05, - "output_cost_per_audio_token": 8e-05, + "input_cost_per_token": 0.000005, + "input_cost_per_audio_token": 0.00004, + "cache_read_input_token_cost": 0.0000025, + "output_cost_per_token": 0.00002, + "output_cost_per_audio_token": 0.00008, "litellm_provider": "openai", "mode": "chat", "supports_function_calling": true, @@ -1264,11 +1520,11 @@ "max_tokens": 4096, "max_input_tokens": 128000, "max_output_tokens": 4096, - "input_cost_per_token": 5e-06, - "input_cost_per_audio_token": 4e-05, - "cache_read_input_token_cost": 2.5e-06, - "output_cost_per_token": 2e-05, - "output_cost_per_audio_token": 8e-05, + "input_cost_per_token": 0.000005, + "input_cost_per_audio_token": 0.00004, + "cache_read_input_token_cost": 0.0000025, + "output_cost_per_token": 0.00002, + "output_cost_per_audio_token": 0.00008, "litellm_provider": "openai", "mode": "chat", "supports_function_calling": true, @@ -1282,11 +1538,11 @@ "max_tokens": 4096, "max_input_tokens": 128000, "max_output_tokens": 4096, - "input_cost_per_token": 5e-06, - "input_cost_per_audio_token": 4e-05, - "cache_read_input_token_cost": 2.5e-06, - "output_cost_per_token": 2e-05, - "output_cost_per_audio_token": 8e-05, + "input_cost_per_token": 0.000005, + "input_cost_per_audio_token": 0.00004, + "cache_read_input_token_cost": 0.0000025, + "output_cost_per_token": 0.00002, + "output_cost_per_audio_token": 0.00008, "litellm_provider": "openai", "mode": "chat", "supports_function_calling": true, @@ -1300,12 +1556,12 @@ "max_tokens": 4096, "max_input_tokens": 128000, "max_output_tokens": 4096, - "input_cost_per_token": 6e-07, - "input_cost_per_audio_token": 1e-05, - "cache_read_input_token_cost": 3e-07, - "cache_creation_input_audio_token_cost": 3e-07, - "output_cost_per_token": 2.4e-06, - "output_cost_per_audio_token": 2e-05, + "input_cost_per_token": 6e-7, + "input_cost_per_audio_token": 0.00001, + "cache_read_input_token_cost": 3e-7, + "cache_creation_input_audio_token_cost": 3e-7, + "output_cost_per_token": 0.0000024, + "output_cost_per_audio_token": 0.00002, "litellm_provider": "openai", "mode": "chat", "supports_function_calling": true, @@ -1319,12 +1575,12 @@ "max_tokens": 4096, "max_input_tokens": 128000, "max_output_tokens": 4096, - "input_cost_per_token": 6e-07, - "input_cost_per_audio_token": 1e-05, - "cache_read_input_token_cost": 3e-07, - "cache_creation_input_audio_token_cost": 3e-07, - "output_cost_per_token": 2.4e-06, - "output_cost_per_audio_token": 2e-05, + "input_cost_per_token": 6e-7, + "input_cost_per_audio_token": 0.00001, + "cache_read_input_token_cost": 3e-7, + "cache_creation_input_audio_token_cost": 3e-7, + "output_cost_per_token": 0.0000024, + "output_cost_per_audio_token": 0.00002, "litellm_provider": "openai", "mode": "chat", "supports_function_calling": true, @@ -1338,8 +1594,8 @@ "max_tokens": 4096, "max_input_tokens": 128000, "max_output_tokens": 4096, - "input_cost_per_token": 1e-05, - "output_cost_per_token": 3e-05, + "input_cost_per_token": 0.00001, + "output_cost_per_token": 0.00003, "litellm_provider": "openai", "mode": "chat", "supports_pdf_input": true, @@ -1353,8 +1609,8 @@ "max_tokens": 4096, "max_input_tokens": 8192, "max_output_tokens": 4096, - "input_cost_per_token": 3e-05, - "output_cost_per_token": 6e-05, + "input_cost_per_token": 0.00003, + "output_cost_per_token": 0.00006, "litellm_provider": "openai", "mode": "chat", "supports_prompt_caching": true, @@ -1365,8 +1621,8 @@ "max_tokens": 4096, "max_input_tokens": 8192, "max_output_tokens": 4096, - "input_cost_per_token": 3e-05, - "output_cost_per_token": 6e-05, + "input_cost_per_token": 0.00003, + "output_cost_per_token": 0.00006, "litellm_provider": "openai", "mode": "chat", "supports_function_calling": true, @@ -1379,7 +1635,7 @@ "max_tokens": 4096, "max_input_tokens": 32768, "max_output_tokens": 4096, - "input_cost_per_token": 6e-05, + "input_cost_per_token": 0.00006, "output_cost_per_token": 0.00012, "litellm_provider": "openai", "mode": "chat", @@ -1391,7 +1647,7 @@ "max_tokens": 4096, "max_input_tokens": 32768, "max_output_tokens": 4096, - "input_cost_per_token": 6e-05, + "input_cost_per_token": 0.00006, "output_cost_per_token": 0.00012, "litellm_provider": "openai", "mode": "chat", @@ -1403,7 +1659,7 @@ "max_tokens": 4096, "max_input_tokens": 32768, "max_output_tokens": 4096, - "input_cost_per_token": 6e-05, + "input_cost_per_token": 0.00006, "output_cost_per_token": 0.00012, "litellm_provider": "openai", "mode": "chat", @@ -1415,8 +1671,8 @@ "max_tokens": 4096, "max_input_tokens": 128000, "max_output_tokens": 4096, - "input_cost_per_token": 1e-05, - "output_cost_per_token": 3e-05, + "input_cost_per_token": 0.00001, + "output_cost_per_token": 0.00003, "litellm_provider": "openai", "mode": "chat", "supports_pdf_input": true, @@ -1431,8 +1687,8 @@ "max_tokens": 4096, "max_input_tokens": 128000, "max_output_tokens": 4096, - "input_cost_per_token": 1e-05, - "output_cost_per_token": 3e-05, + "input_cost_per_token": 0.00001, + "output_cost_per_token": 0.00003, "litellm_provider": "openai", "mode": "chat", "supports_pdf_input": true, @@ -1447,8 +1703,8 @@ "max_tokens": 4096, "max_input_tokens": 128000, "max_output_tokens": 4096, - "input_cost_per_token": 1e-05, - "output_cost_per_token": 3e-05, + "input_cost_per_token": 0.00001, + "output_cost_per_token": 0.00003, "litellm_provider": "openai", "mode": "chat", "supports_function_calling": true, @@ -1461,8 +1717,8 @@ "max_tokens": 4096, "max_input_tokens": 128000, "max_output_tokens": 4096, - "input_cost_per_token": 1e-05, - "output_cost_per_token": 3e-05, + "input_cost_per_token": 0.00001, + "output_cost_per_token": 0.00003, "litellm_provider": "openai", "mode": "chat", "supports_function_calling": true, @@ -1475,8 +1731,8 @@ "max_tokens": 4096, "max_input_tokens": 128000, "max_output_tokens": 4096, - "input_cost_per_token": 1e-05, - "output_cost_per_token": 3e-05, + "input_cost_per_token": 0.00001, + "output_cost_per_token": 0.00003, "litellm_provider": "openai", "mode": "chat", "supports_vision": true, @@ -1490,8 +1746,8 @@ "max_tokens": 4096, "max_input_tokens": 128000, "max_output_tokens": 4096, - "input_cost_per_token": 1e-05, - "output_cost_per_token": 3e-05, + "input_cost_per_token": 0.00001, + "output_cost_per_token": 0.00003, "litellm_provider": "openai", "mode": "chat", "supports_vision": true, @@ -1505,8 +1761,8 @@ "max_tokens": 4097, "max_input_tokens": 16385, "max_output_tokens": 4096, - "input_cost_per_token": 1.5e-06, - "output_cost_per_token": 2e-06, + "input_cost_per_token": 0.0000015, + "output_cost_per_token": 0.000002, "litellm_provider": "openai", "mode": "chat", "supports_function_calling": true, @@ -1518,8 +1774,8 @@ "max_tokens": 4097, "max_input_tokens": 4097, "max_output_tokens": 4096, - "input_cost_per_token": 1.5e-06, - "output_cost_per_token": 2e-06, + "input_cost_per_token": 0.0000015, + "output_cost_per_token": 0.000002, "litellm_provider": "openai", "mode": "chat", "supports_prompt_caching": true, @@ -1530,8 +1786,8 @@ "max_tokens": 4097, "max_input_tokens": 4097, "max_output_tokens": 4096, - "input_cost_per_token": 1.5e-06, - "output_cost_per_token": 2e-06, + "input_cost_per_token": 0.0000015, + "output_cost_per_token": 0.000002, "litellm_provider": "openai", "mode": "chat", "supports_function_calling": true, @@ -1543,8 +1799,8 @@ "max_tokens": 16385, "max_input_tokens": 16385, "max_output_tokens": 4096, - "input_cost_per_token": 1e-06, - "output_cost_per_token": 2e-06, + "input_cost_per_token": 0.000001, + "output_cost_per_token": 0.000002, "litellm_provider": "openai", "mode": "chat", "supports_function_calling": true, @@ -1557,8 +1813,8 @@ "max_tokens": 16385, "max_input_tokens": 16385, "max_output_tokens": 4096, - "input_cost_per_token": 5e-07, - "output_cost_per_token": 1.5e-06, + "input_cost_per_token": 5e-7, + "output_cost_per_token": 0.0000015, "litellm_provider": "openai", "mode": "chat", "supports_function_calling": true, @@ -1571,8 +1827,8 @@ "max_tokens": 16385, "max_input_tokens": 16385, "max_output_tokens": 4096, - "input_cost_per_token": 3e-06, - "output_cost_per_token": 4e-06, + "input_cost_per_token": 0.000003, + "output_cost_per_token": 0.000004, "litellm_provider": "openai", "mode": "chat", "supports_prompt_caching": true, @@ -1583,8 +1839,8 @@ "max_tokens": 16385, "max_input_tokens": 16385, "max_output_tokens": 4096, - "input_cost_per_token": 3e-06, - "output_cost_per_token": 4e-06, + "input_cost_per_token": 0.000003, + "output_cost_per_token": 0.000004, "litellm_provider": "openai", "mode": "chat", "supports_prompt_caching": true, @@ -1595,10 +1851,10 @@ "max_tokens": 4096, "max_input_tokens": 16385, "max_output_tokens": 4096, - "input_cost_per_token": 3e-06, - "output_cost_per_token": 6e-06, - "input_cost_per_token_batches": 1.5e-06, - "output_cost_per_token_batches": 3e-06, + "input_cost_per_token": 0.000003, + "output_cost_per_token": 0.000006, + "input_cost_per_token_batches": 0.0000015, + "output_cost_per_token_batches": 0.000003, "litellm_provider": "openai", "mode": "chat", "supports_system_messages": true, @@ -1608,8 +1864,8 @@ "max_tokens": 4096, "max_input_tokens": 16385, "max_output_tokens": 4096, - "input_cost_per_token": 3e-06, - "output_cost_per_token": 6e-06, + "input_cost_per_token": 0.000003, + "output_cost_per_token": 0.000006, "litellm_provider": "openai", "mode": "chat", "supports_system_messages": true, @@ -1619,8 +1875,8 @@ "max_tokens": 4096, "max_input_tokens": 16385, "max_output_tokens": 4096, - "input_cost_per_token": 3e-06, - "output_cost_per_token": 6e-06, + "input_cost_per_token": 0.000003, + "output_cost_per_token": 0.000006, "litellm_provider": "openai", "mode": "chat", "supports_system_messages": true, @@ -1630,8 +1886,8 @@ "max_tokens": 4096, "max_input_tokens": 4096, "max_output_tokens": 4096, - "input_cost_per_token": 3e-06, - "output_cost_per_token": 6e-06, + "input_cost_per_token": 0.000003, + "output_cost_per_token": 0.000006, "litellm_provider": "openai", "mode": "chat", "supports_system_messages": true, @@ -1641,8 +1897,8 @@ "max_tokens": 4096, "max_input_tokens": 8192, "max_output_tokens": 4096, - "input_cost_per_token": 3e-05, - "output_cost_per_token": 6e-05, + "input_cost_per_token": 0.00003, + "output_cost_per_token": 0.00006, "litellm_provider": "openai", "mode": "chat", "supports_function_calling": true, @@ -1654,10 +1910,10 @@ "max_tokens": 16384, "max_input_tokens": 128000, "max_output_tokens": 16384, - "input_cost_per_token": 3.75e-06, - "output_cost_per_token": 1.5e-05, - "input_cost_per_token_batches": 1.875e-06, - "output_cost_per_token_batches": 7.5e-06, + "input_cost_per_token": 0.00000375, + "output_cost_per_token": 0.000015, + "input_cost_per_token_batches": 0.000001875, + "output_cost_per_token_batches": 0.0000075, "litellm_provider": "openai", "mode": "chat", "supports_pdf_input": true, @@ -1672,9 +1928,9 @@ "max_tokens": 16384, "max_input_tokens": 128000, "max_output_tokens": 16384, - "input_cost_per_token": 3.75e-06, - "cache_creation_input_token_cost": 1.875e-06, - "output_cost_per_token": 1.5e-05, + "input_cost_per_token": 0.00000375, + "cache_creation_input_token_cost": 0.000001875, + "output_cost_per_token": 0.000015, "litellm_provider": "openai", "mode": "chat", "supports_pdf_input": true, @@ -1690,11 +1946,11 @@ "max_tokens": 16384, "max_input_tokens": 128000, "max_output_tokens": 16384, - "input_cost_per_token": 3e-07, - "output_cost_per_token": 1.2e-06, - "input_cost_per_token_batches": 1.5e-07, - "output_cost_per_token_batches": 6e-07, - "cache_read_input_token_cost": 1.5e-07, + "input_cost_per_token": 3e-7, + "output_cost_per_token": 0.0000012, + "input_cost_per_token_batches": 1.5e-7, + "output_cost_per_token_batches": 6e-7, + "cache_read_input_token_cost": 1.5e-7, "litellm_provider": "openai", "mode": "chat", "supports_function_calling": true, @@ -1710,10 +1966,10 @@ "max_tokens": 16384, "max_input_tokens": 16384, "max_output_tokens": 4096, - "input_cost_per_token": 2e-06, - "output_cost_per_token": 2e-06, - "input_cost_per_token_batches": 1e-06, - "output_cost_per_token_batches": 1e-06, + "input_cost_per_token": 0.000002, + "output_cost_per_token": 0.000002, + "input_cost_per_token_batches": 0.000001, + "output_cost_per_token_batches": 0.000001, "litellm_provider": "text-completion-openai", "mode": "completion" }, @@ -1721,10 +1977,10 @@ "max_tokens": 16384, "max_input_tokens": 16384, "max_output_tokens": 4096, - "input_cost_per_token": 4e-07, - "output_cost_per_token": 4e-07, - "input_cost_per_token_batches": 2e-07, - "output_cost_per_token_batches": 2e-07, + "input_cost_per_token": 4e-7, + "output_cost_per_token": 4e-7, + "input_cost_per_token_batches": 2e-7, + "output_cost_per_token_batches": 2e-7, "litellm_provider": "text-completion-openai", "mode": "completion" }, @@ -1732,10 +1988,10 @@ "max_tokens": 8191, "max_input_tokens": 8191, "output_vector_size": 3072, - "input_cost_per_token": 1.3e-07, - "output_cost_per_token": 0.0, - "input_cost_per_token_batches": 6.5e-08, - "output_cost_per_token_batches": 0.0, + "input_cost_per_token": 1.3e-7, + "output_cost_per_token": 0, + "input_cost_per_token_batches": 6.5e-8, + "output_cost_per_token_batches": 0, "litellm_provider": "openai", "mode": "embedding" }, @@ -1743,10 +1999,10 @@ "max_tokens": 8191, "max_input_tokens": 8191, "output_vector_size": 1536, - "input_cost_per_token": 2e-08, - "output_cost_per_token": 0.0, - "input_cost_per_token_batches": 1e-08, - "output_cost_per_token_batches": 0.0, + "input_cost_per_token": 2e-8, + "output_cost_per_token": 0, + "input_cost_per_token_batches": 1e-8, + "output_cost_per_token_batches": 0, "litellm_provider": "openai", "mode": "embedding" }, @@ -1754,18 +2010,18 @@ "max_tokens": 8191, "max_input_tokens": 8191, "output_vector_size": 1536, - "input_cost_per_token": 1e-07, - "output_cost_per_token": 0.0, + "input_cost_per_token": 1e-7, + "output_cost_per_token": 0, "litellm_provider": "openai", "mode": "embedding" }, "text-embedding-ada-002-v2": { "max_tokens": 8191, "max_input_tokens": 8191, - "input_cost_per_token": 1e-07, - "output_cost_per_token": 0.0, - "input_cost_per_token_batches": 5e-08, - "output_cost_per_token_batches": 0.0, + "input_cost_per_token": 1e-7, + "output_cost_per_token": 0, + "input_cost_per_token_batches": 5e-8, + "output_cost_per_token_batches": 0, "litellm_provider": "openai", "mode": "embedding" }, @@ -1773,8 +2029,8 @@ "max_tokens": 32768, "max_input_tokens": 32768, "max_output_tokens": 0, - "input_cost_per_token": 0.0, - "output_cost_per_token": 0.0, + "input_cost_per_token": 0, + "output_cost_per_token": 0, "litellm_provider": "openai", "mode": "moderation" }, @@ -1782,8 +2038,8 @@ "max_tokens": 32768, "max_input_tokens": 32768, "max_output_tokens": 0, - "input_cost_per_token": 0.0, - "output_cost_per_token": 0.0, + "input_cost_per_token": 0, + "output_cost_per_token": 0, "litellm_provider": "openai", "mode": "moderation" }, @@ -1791,69 +2047,69 @@ "max_tokens": 32768, "max_input_tokens": 32768, "max_output_tokens": 0, - "input_cost_per_token": 0.0, - "output_cost_per_token": 0.0, + "input_cost_per_token": 0, + "output_cost_per_token": 0, "litellm_provider": "openai", "mode": "moderation" }, "256-x-256/dall-e-2": { "mode": "image_generation", - "input_cost_per_pixel": 2.4414e-07, - "output_cost_per_pixel": 0.0, + "input_cost_per_pixel": 2.4414e-7, + "output_cost_per_pixel": 0, "litellm_provider": "openai" }, "512-x-512/dall-e-2": { "mode": "image_generation", - "input_cost_per_pixel": 6.86e-08, - "output_cost_per_pixel": 0.0, + "input_cost_per_pixel": 6.86e-8, + "output_cost_per_pixel": 0, "litellm_provider": "openai" }, "1024-x-1024/dall-e-2": { "mode": "image_generation", - "input_cost_per_pixel": 1.9e-08, - "output_cost_per_pixel": 0.0, + "input_cost_per_pixel": 1.9e-8, + "output_cost_per_pixel": 0, "litellm_provider": "openai" }, "hd/1024-x-1792/dall-e-3": { "mode": "image_generation", - "input_cost_per_pixel": 6.539e-08, - "output_cost_per_pixel": 0.0, + "input_cost_per_pixel": 6.539e-8, + "output_cost_per_pixel": 0, "litellm_provider": "openai" }, "hd/1792-x-1024/dall-e-3": { "mode": "image_generation", - "input_cost_per_pixel": 6.539e-08, - "output_cost_per_pixel": 0.0, + "input_cost_per_pixel": 6.539e-8, + "output_cost_per_pixel": 0, "litellm_provider": "openai" }, "hd/1024-x-1024/dall-e-3": { "mode": "image_generation", - "input_cost_per_pixel": 7.629e-08, - "output_cost_per_pixel": 0.0, + "input_cost_per_pixel": 7.629e-8, + "output_cost_per_pixel": 0, "litellm_provider": "openai" }, "standard/1024-x-1792/dall-e-3": { "mode": "image_generation", - "input_cost_per_pixel": 4.359e-08, - "output_cost_per_pixel": 0.0, + "input_cost_per_pixel": 4.359e-8, + "output_cost_per_pixel": 0, "litellm_provider": "openai" }, "standard/1792-x-1024/dall-e-3": { "mode": "image_generation", - "input_cost_per_pixel": 4.359e-08, - "output_cost_per_pixel": 0.0, + "input_cost_per_pixel": 4.359e-8, + "output_cost_per_pixel": 0, "litellm_provider": "openai" }, "standard/1024-x-1024/dall-e-3": { "mode": "image_generation", - "input_cost_per_pixel": 3.81469e-08, - "output_cost_per_pixel": 0.0, + "input_cost_per_pixel": 3.81469e-8, + "output_cost_per_pixel": 0, "litellm_provider": "openai" }, "gpt-image-1": { "mode": "image_generation", - "input_cost_per_pixel": 4.0054321e-08, - "output_cost_per_pixel": 0.0, + "input_cost_per_pixel": 4.0054321e-8, + "output_cost_per_pixel": 0, "litellm_provider": "openai", "supported_endpoints": [ "/v1/images/generations" @@ -1861,8 +2117,8 @@ }, "low/1024-x-1024/gpt-image-1": { "mode": "image_generation", - "input_cost_per_pixel": 1.0490417e-08, - "output_cost_per_pixel": 0.0, + "input_cost_per_pixel": 1.0490417e-8, + "output_cost_per_pixel": 0, "litellm_provider": "openai", "supported_endpoints": [ "/v1/images/generations" @@ -1870,8 +2126,8 @@ }, "medium/1024-x-1024/gpt-image-1": { "mode": "image_generation", - "input_cost_per_pixel": 4.0054321e-08, - "output_cost_per_pixel": 0.0, + "input_cost_per_pixel": 4.0054321e-8, + "output_cost_per_pixel": 0, "litellm_provider": "openai", "supported_endpoints": [ "/v1/images/generations" @@ -1879,8 +2135,8 @@ }, "high/1024-x-1024/gpt-image-1": { "mode": "image_generation", - "input_cost_per_pixel": 1.59263611e-07, - "output_cost_per_pixel": 0.0, + "input_cost_per_pixel": 1.59263611e-7, + "output_cost_per_pixel": 0, "litellm_provider": "openai", "supported_endpoints": [ "/v1/images/generations" @@ -1888,8 +2144,8 @@ }, "low/1024-x-1536/gpt-image-1": { "mode": "image_generation", - "input_cost_per_pixel": 1.0172526e-08, - "output_cost_per_pixel": 0.0, + "input_cost_per_pixel": 1.0172526e-8, + "output_cost_per_pixel": 0, "litellm_provider": "openai", "supported_endpoints": [ "/v1/images/generations" @@ -1897,8 +2153,8 @@ }, "medium/1024-x-1536/gpt-image-1": { "mode": "image_generation", - "input_cost_per_pixel": 4.0054321e-08, - "output_cost_per_pixel": 0.0, + "input_cost_per_pixel": 4.0054321e-8, + "output_cost_per_pixel": 0, "litellm_provider": "openai", "supported_endpoints": [ "/v1/images/generations" @@ -1906,8 +2162,8 @@ }, "high/1024-x-1536/gpt-image-1": { "mode": "image_generation", - "input_cost_per_pixel": 1.58945719e-07, - "output_cost_per_pixel": 0.0, + "input_cost_per_pixel": 1.58945719e-7, + "output_cost_per_pixel": 0, "litellm_provider": "openai", "supported_endpoints": [ "/v1/images/generations" @@ -1915,8 +2171,8 @@ }, "low/1536-x-1024/gpt-image-1": { "mode": "image_generation", - "input_cost_per_pixel": 1.0172526e-08, - "output_cost_per_pixel": 0.0, + "input_cost_per_pixel": 1.0172526e-8, + "output_cost_per_pixel": 0, "litellm_provider": "openai", "supported_endpoints": [ "/v1/images/generations" @@ -1924,8 +2180,8 @@ }, "medium/1536-x-1024/gpt-image-1": { "mode": "image_generation", - "input_cost_per_pixel": 4.0054321e-08, - "output_cost_per_pixel": 0.0, + "input_cost_per_pixel": 4.0054321e-8, + "output_cost_per_pixel": 0, "litellm_provider": "openai", "supported_endpoints": [ "/v1/images/generations" @@ -1933,8 +2189,8 @@ }, "high/1536-x-1024/gpt-image-1": { "mode": "image_generation", - "input_cost_per_pixel": 1.58945719e-07, - "output_cost_per_pixel": 0.0, + "input_cost_per_pixel": 1.58945719e-7, + "output_cost_per_pixel": 0, "litellm_provider": "openai", "supported_endpoints": [ "/v1/images/generations" @@ -1944,9 +2200,9 @@ "mode": "audio_transcription", "max_input_tokens": 16000, "max_output_tokens": 2000, - "input_cost_per_token": 2.5e-06, - "input_cost_per_audio_token": 6e-06, - "output_cost_per_token": 1e-05, + "input_cost_per_token": 0.0000025, + "input_cost_per_audio_token": 0.000006, + "output_cost_per_token": 0.00001, "litellm_provider": "openai", "supported_endpoints": [ "/v1/audio/transcriptions" @@ -1956,9 +2212,9 @@ "mode": "audio_transcription", "max_input_tokens": 16000, "max_output_tokens": 2000, - "input_cost_per_token": 1.25e-06, - "input_cost_per_audio_token": 3e-06, - "output_cost_per_token": 5e-06, + "input_cost_per_token": 0.00000125, + "input_cost_per_audio_token": 0.000003, + "output_cost_per_token": 0.000005, "litellm_provider": "openai", "supported_endpoints": [ "/v1/audio/transcriptions" @@ -1975,7 +2231,7 @@ }, "tts-1": { "mode": "audio_speech", - "input_cost_per_character": 1.5e-05, + "input_cost_per_character": 0.000015, "litellm_provider": "openai", "supported_endpoints": [ "/v1/audio/speech" @@ -1983,7 +2239,7 @@ }, "tts-1-hd": { "mode": "audio_speech", - "input_cost_per_character": 3e-05, + "input_cost_per_character": 0.00003, "litellm_provider": "openai", "supported_endpoints": [ "/v1/audio/speech" @@ -1991,9 +2247,9 @@ }, "gpt-4o-mini-tts": { "mode": "audio_speech", - "input_cost_per_token": 2.5e-06, - "output_cost_per_token": 1e-05, - "output_cost_per_audio_token": 1.2e-05, + "input_cost_per_token": 0.0000025, + "output_cost_per_token": 0.00001, + "output_cost_per_audio_token": 0.000012, "output_cost_per_second": 0.00025, "litellm_provider": "openai", "supported_modalities": [ @@ -2007,11 +2263,268 @@ "/v1/audio/speech" ] }, + "azure/gpt-5": { + "max_tokens": 128000, + "max_input_tokens": 272000, + "max_output_tokens": 128000, + "input_cost_per_token": 0.00000125, + "output_cost_per_token": 0.00001, + "cache_read_input_token_cost": 1.25e-7, + "litellm_provider": "azure", + "mode": "chat", + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/batch", + "/v1/responses" + ], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text" + ], + "supports_pdf_input": true, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_response_schema": true, + "supports_vision": true, + "supports_prompt_caching": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_native_streaming": true, + "supports_reasoning": true + }, + "azure/gpt-5-2025-08-07": { + "max_tokens": 128000, + "max_input_tokens": 272000, + "max_output_tokens": 128000, + "input_cost_per_token": 0.00000125, + "output_cost_per_token": 0.00001, + "cache_read_input_token_cost": 1.25e-7, + "litellm_provider": "azure", + "mode": "chat", + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/batch", + "/v1/responses" + ], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text" + ], + "supports_pdf_input": true, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_response_schema": true, + "supports_vision": true, + "supports_prompt_caching": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_native_streaming": true, + "supports_reasoning": true + }, + "azure/gpt-5-mini": { + "max_tokens": 128000, + "max_input_tokens": 272000, + "max_output_tokens": 128000, + "input_cost_per_token": 2.5e-7, + "output_cost_per_token": 0.000002, + "cache_read_input_token_cost": 2.5e-8, + "litellm_provider": "azure", + "mode": "chat", + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/batch", + "/v1/responses" + ], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text" + ], + "supports_pdf_input": true, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_response_schema": true, + "supports_vision": true, + "supports_prompt_caching": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_native_streaming": true, + "supports_reasoning": true + }, + "azure/gpt-5-mini-2025-08-07": { + "max_tokens": 128000, + "max_input_tokens": 272000, + "max_output_tokens": 128000, + "input_cost_per_token": 2.5e-7, + "output_cost_per_token": 0.000002, + "cache_read_input_token_cost": 2.5e-8, + "litellm_provider": "azure", + "mode": "chat", + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/batch", + "/v1/responses" + ], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text" + ], + "supports_pdf_input": true, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_response_schema": true, + "supports_vision": true, + "supports_prompt_caching": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_native_streaming": true, + "supports_reasoning": true + }, + "azure/gpt-5-nano-2025-08-07": { + "max_tokens": 128000, + "max_input_tokens": 272000, + "max_output_tokens": 128000, + "input_cost_per_token": 5e-8, + "output_cost_per_token": 4e-7, + "cache_read_input_token_cost": 5e-9, + "litellm_provider": "azure", + "mode": "chat", + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/batch", + "/v1/responses" + ], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text" + ], + "supports_pdf_input": true, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_response_schema": true, + "supports_vision": true, + "supports_prompt_caching": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_native_streaming": true, + "supports_reasoning": true + }, + "azure/gpt-5-nano": { + "max_tokens": 128000, + "max_input_tokens": 272000, + "max_output_tokens": 128000, + "input_cost_per_token": 5e-8, + "output_cost_per_token": 4e-7, + "cache_read_input_token_cost": 5e-9, + "litellm_provider": "azure", + "mode": "chat", + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/batch", + "/v1/responses" + ], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text" + ], + "supports_pdf_input": true, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_response_schema": true, + "supports_vision": true, + "supports_prompt_caching": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_native_streaming": true, + "supports_reasoning": true + }, + "azure/gpt-5-chat": { + "max_tokens": 128000, + "max_input_tokens": 400000, + "max_output_tokens": 128000, + "input_cost_per_token": 0.00000125, + "output_cost_per_token": 0.00001, + "cache_read_input_token_cost": 1.25e-7, + "litellm_provider": "azure", + "mode": "chat", + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/batch", + "/v1/responses" + ], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text" + ], + "supports_pdf_input": true, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_response_schema": true, + "supports_vision": true, + "supports_prompt_caching": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_native_streaming": true, + "supports_reasoning": true, + "source": "https://azure.microsoft.com/en-us/blog/gpt-5-in-azure-ai-foundry-the-future-of-ai-apps-and-agents-starts-here/" + }, + "azure/gpt-5-chat-latest": { + "max_tokens": 128000, + "max_input_tokens": 400000, + "max_output_tokens": 128000, + "input_cost_per_token": 0.00000125, + "output_cost_per_token": 0.00001, + "cache_read_input_token_cost": 1.25e-7, + "litellm_provider": "azure", + "mode": "chat", + "supported_endpoints": [ + "/v1/chat/completions", + "/v1/batch", + "/v1/responses" + ], + "supported_modalities": [ + "text", + "image" + ], + "supported_output_modalities": [ + "text" + ], + "supports_pdf_input": true, + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_response_schema": true, + "supports_vision": true, + "supports_prompt_caching": true, + "supports_system_messages": true, + "supports_tool_choice": true, + "supports_native_streaming": true, + "supports_reasoning": true + }, "azure/gpt-4o-mini-tts": { "mode": "audio_speech", - "input_cost_per_token": 2.5e-06, - "output_cost_per_token": 1e-05, - "output_cost_per_audio_token": 1.2e-05, + "input_cost_per_token": 0.0000025, + "output_cost_per_token": 0.00001, + "output_cost_per_audio_token": 0.000012, "output_cost_per_second": 0.00025, "litellm_provider": "azure", "supported_modalities": [ @@ -2029,8 +2542,8 @@ "max_tokens": 1024, "max_input_tokens": 8192, "max_output_tokens": 1024, - "input_cost_per_token": 3e-06, - "output_cost_per_token": 1.2e-05, + "input_cost_per_token": 0.000003, + "output_cost_per_token": 0.000012, "litellm_provider": "azure", "mode": "chat", "supported_endpoints": [ @@ -2056,10 +2569,10 @@ "max_tokens": 16384, "max_input_tokens": 128000, "max_output_tokens": 16384, - "input_cost_per_token": 2.5e-06, - "input_cost_per_audio_token": 4e-05, - "output_cost_per_token": 1e-05, - "output_cost_per_audio_token": 8e-05, + "input_cost_per_token": 0.0000025, + "input_cost_per_audio_token": 0.00004, + "output_cost_per_token": 0.00001, + "output_cost_per_audio_token": 0.00008, "litellm_provider": "azure", "mode": "chat", "supported_endpoints": [ @@ -2087,10 +2600,10 @@ "max_tokens": 16384, "max_input_tokens": 128000, "max_output_tokens": 16384, - "input_cost_per_token": 2.5e-06, - "input_cost_per_audio_token": 4e-05, - "output_cost_per_token": 1e-05, - "output_cost_per_audio_token": 8e-05, + "input_cost_per_token": 0.0000025, + "input_cost_per_audio_token": 0.00004, + "output_cost_per_token": 0.00001, + "output_cost_per_audio_token": 0.00008, "litellm_provider": "azure", "mode": "chat", "supported_endpoints": [ @@ -2118,11 +2631,11 @@ "max_tokens": 32768, "max_input_tokens": 1047576, "max_output_tokens": 32768, - "input_cost_per_token": 2e-06, - "output_cost_per_token": 8e-06, - "input_cost_per_token_batches": 1e-06, - "output_cost_per_token_batches": 4e-06, - "cache_read_input_token_cost": 5e-07, + "input_cost_per_token": 0.000002, + "output_cost_per_token": 0.000008, + "input_cost_per_token_batches": 0.000001, + "output_cost_per_token_batches": 0.000004, + "cache_read_input_token_cost": 5e-7, "litellm_provider": "azure", "mode": "chat", "supported_endpoints": [ @@ -2156,11 +2669,11 @@ "max_tokens": 32768, "max_input_tokens": 1047576, "max_output_tokens": 32768, - "input_cost_per_token": 2e-06, - "output_cost_per_token": 8e-06, - "input_cost_per_token_batches": 1e-06, - "output_cost_per_token_batches": 4e-06, - "cache_read_input_token_cost": 5e-07, + "input_cost_per_token": 0.000002, + "output_cost_per_token": 0.000008, + "input_cost_per_token_batches": 0.000001, + "output_cost_per_token_batches": 0.000004, + "cache_read_input_token_cost": 5e-7, "litellm_provider": "azure", "mode": "chat", "supported_endpoints": [ @@ -2194,11 +2707,11 @@ "max_tokens": 32768, "max_input_tokens": 1047576, "max_output_tokens": 32768, - "input_cost_per_token": 4e-07, - "output_cost_per_token": 1.6e-06, - "input_cost_per_token_batches": 2e-07, - "output_cost_per_token_batches": 8e-07, - "cache_read_input_token_cost": 1e-07, + "input_cost_per_token": 4e-7, + "output_cost_per_token": 0.0000016, + "input_cost_per_token_batches": 2e-7, + "output_cost_per_token_batches": 8e-7, + "cache_read_input_token_cost": 1e-7, "litellm_provider": "azure", "mode": "chat", "supported_endpoints": [ @@ -2232,11 +2745,11 @@ "max_tokens": 32768, "max_input_tokens": 1047576, "max_output_tokens": 32768, - "input_cost_per_token": 4e-07, - "output_cost_per_token": 1.6e-06, - "input_cost_per_token_batches": 2e-07, - "output_cost_per_token_batches": 8e-07, - "cache_read_input_token_cost": 1e-07, + "input_cost_per_token": 4e-7, + "output_cost_per_token": 0.0000016, + "input_cost_per_token_batches": 2e-7, + "output_cost_per_token_batches": 8e-7, + "cache_read_input_token_cost": 1e-7, "litellm_provider": "azure", "mode": "chat", "supported_endpoints": [ @@ -2270,11 +2783,11 @@ "max_tokens": 32768, "max_input_tokens": 1047576, "max_output_tokens": 32768, - "input_cost_per_token": 1e-07, - "output_cost_per_token": 4e-07, - "input_cost_per_token_batches": 5e-08, - "output_cost_per_token_batches": 2e-07, - "cache_read_input_token_cost": 2.5e-08, + "input_cost_per_token": 1e-7, + "output_cost_per_token": 4e-7, + "input_cost_per_token_batches": 5e-8, + "output_cost_per_token_batches": 2e-7, + "cache_read_input_token_cost": 2.5e-8, "litellm_provider": "azure", "mode": "chat", "supported_endpoints": [ @@ -2302,11 +2815,11 @@ "max_tokens": 32768, "max_input_tokens": 1047576, "max_output_tokens": 32768, - "input_cost_per_token": 1e-07, - "output_cost_per_token": 4e-07, - "input_cost_per_token_batches": 5e-08, - "output_cost_per_token_batches": 2e-07, - "cache_read_input_token_cost": 2.5e-08, + "input_cost_per_token": 1e-7, + "output_cost_per_token": 4e-7, + "input_cost_per_token_batches": 5e-8, + "output_cost_per_token_batches": 2e-7, + "cache_read_input_token_cost": 2.5e-8, "litellm_provider": "azure", "mode": "chat", "supported_endpoints": [ @@ -2334,10 +2847,10 @@ "max_tokens": 100000, "max_input_tokens": 200000, "max_output_tokens": 100000, - "input_cost_per_token": 2e-05, - "output_cost_per_token": 8e-05, - "input_cost_per_token_batches": 1e-05, - "output_cost_per_token_batches": 4e-05, + "input_cost_per_token": 0.00002, + "output_cost_per_token": 0.00008, + "input_cost_per_token_batches": 0.00001, + "output_cost_per_token_batches": 0.00004, "litellm_provider": "azure", "mode": "responses", "supported_endpoints": [ @@ -2364,10 +2877,10 @@ "max_tokens": 100000, "max_input_tokens": 200000, "max_output_tokens": 100000, - "input_cost_per_token": 2e-05, - "output_cost_per_token": 8e-05, - "input_cost_per_token_batches": 1e-05, - "output_cost_per_token_batches": 4e-05, + "input_cost_per_token": 0.00002, + "output_cost_per_token": 0.00008, + "input_cost_per_token_batches": 0.00001, + "output_cost_per_token_batches": 0.00004, "litellm_provider": "azure", "mode": "responses", "supported_endpoints": [ @@ -2394,9 +2907,9 @@ "max_tokens": 100000, "max_input_tokens": 200000, "max_output_tokens": 100000, - "input_cost_per_token": 2e-06, - "output_cost_per_token": 8e-06, - "cache_read_input_token_cost": 5e-07, + "input_cost_per_token": 0.000002, + "output_cost_per_token": 0.000008, + "cache_read_input_token_cost": 5e-7, "litellm_provider": "azure", "mode": "chat", "supported_endpoints": [ @@ -2423,9 +2936,9 @@ "max_tokens": 100000, "max_input_tokens": 200000, "max_output_tokens": 100000, - "input_cost_per_token": 1e-05, - "output_cost_per_token": 4e-05, - "cache_read_input_token_cost": 2.5e-06, + "input_cost_per_token": 0.00001, + "output_cost_per_token": 0.00004, + "cache_read_input_token_cost": 0.0000025, "litellm_provider": "azure", "mode": "chat", "supported_endpoints": [ @@ -2452,9 +2965,9 @@ "max_tokens": 100000, "max_input_tokens": 200000, "max_output_tokens": 100000, - "input_cost_per_token": 1e-05, - "output_cost_per_token": 4e-05, - "cache_read_input_token_cost": 2.5e-06, + "input_cost_per_token": 0.00001, + "output_cost_per_token": 0.00004, + "cache_read_input_token_cost": 0.0000025, "litellm_provider": "azure", "mode": "responses", "supported_endpoints": [ @@ -2484,9 +2997,9 @@ "max_tokens": 100000, "max_input_tokens": 200000, "max_output_tokens": 100000, - "input_cost_per_token": 1.1e-06, - "output_cost_per_token": 4.4e-06, - "cache_read_input_token_cost": 2.75e-07, + "input_cost_per_token": 0.0000011, + "output_cost_per_token": 0.0000044, + "cache_read_input_token_cost": 2.75e-7, "litellm_provider": "azure", "mode": "chat", "supported_endpoints": [ @@ -2513,12 +3026,12 @@ "max_tokens": 4096, "max_input_tokens": 128000, "max_output_tokens": 4096, - "input_cost_per_token": 6e-07, - "input_cost_per_audio_token": 1e-05, - "cache_read_input_token_cost": 3e-07, - "cache_creation_input_audio_token_cost": 3e-07, - "output_cost_per_token": 2.4e-06, - "output_cost_per_audio_token": 2e-05, + "input_cost_per_token": 6e-7, + "input_cost_per_audio_token": 0.00001, + "cache_read_input_token_cost": 3e-7, + "cache_creation_input_audio_token_cost": 3e-7, + "output_cost_per_token": 0.0000024, + "output_cost_per_audio_token": 0.00002, "litellm_provider": "azure", "mode": "chat", "supports_function_calling": true, @@ -2532,12 +3045,12 @@ "max_tokens": 4096, "max_input_tokens": 128000, "max_output_tokens": 4096, - "input_cost_per_token": 6.6e-07, - "input_cost_per_audio_token": 1.1e-05, - "cache_read_input_token_cost": 3.3e-07, - "cache_creation_input_audio_token_cost": 3.3e-07, - "output_cost_per_token": 2.64e-06, - "output_cost_per_audio_token": 2.2e-05, + "input_cost_per_token": 6.6e-7, + "input_cost_per_audio_token": 0.000011, + "cache_read_input_token_cost": 3.3e-7, + "cache_creation_input_audio_token_cost": 3.3e-7, + "output_cost_per_token": 0.00000264, + "output_cost_per_audio_token": 0.000022, "litellm_provider": "azure", "mode": "chat", "supports_function_calling": true, @@ -2551,12 +3064,12 @@ "max_tokens": 4096, "max_input_tokens": 128000, "max_output_tokens": 4096, - "input_cost_per_token": 6.6e-07, - "input_cost_per_audio_token": 1.1e-05, - "cache_read_input_token_cost": 3.3e-07, - "cache_creation_input_audio_token_cost": 3.3e-07, - "output_cost_per_token": 2.64e-06, - "output_cost_per_audio_token": 2.2e-05, + "input_cost_per_token": 6.6e-7, + "input_cost_per_audio_token": 0.000011, + "cache_read_input_token_cost": 3.3e-7, + "cache_creation_input_audio_token_cost": 3.3e-7, + "output_cost_per_token": 0.00000264, + "output_cost_per_audio_token": 0.000022, "litellm_provider": "azure", "mode": "chat", "supports_function_calling": true, @@ -2570,11 +3083,11 @@ "max_tokens": 4096, "max_input_tokens": 128000, "max_output_tokens": 4096, - "input_cost_per_token": 5e-06, - "input_cost_per_audio_token": 4e-05, - "cache_read_input_token_cost": 2.5e-06, - "output_cost_per_token": 2e-05, - "output_cost_per_audio_token": 8e-05, + "input_cost_per_token": 0.000005, + "input_cost_per_audio_token": 0.00004, + "cache_read_input_token_cost": 0.0000025, + "output_cost_per_token": 0.00002, + "output_cost_per_audio_token": 0.00008, "litellm_provider": "azure", "mode": "chat", "supported_modalities": [ @@ -2596,12 +3109,12 @@ "max_tokens": 4096, "max_input_tokens": 128000, "max_output_tokens": 4096, - "input_cost_per_token": 5.5e-06, - "input_cost_per_audio_token": 4.4e-05, - "cache_read_input_token_cost": 2.75e-06, - "cache_read_input_audio_token_cost": 2.5e-06, - "output_cost_per_token": 2.2e-05, - "output_cost_per_audio_token": 8e-05, + "input_cost_per_token": 0.0000055, + "input_cost_per_audio_token": 0.000044, + "cache_read_input_token_cost": 0.00000275, + "cache_read_input_audio_token_cost": 0.0000025, + "output_cost_per_token": 0.000022, + "output_cost_per_audio_token": 0.00008, "litellm_provider": "azure", "mode": "chat", "supported_modalities": [ @@ -2623,12 +3136,12 @@ "max_tokens": 4096, "max_input_tokens": 128000, "max_output_tokens": 4096, - "input_cost_per_token": 5.5e-06, - "input_cost_per_audio_token": 4.4e-05, - "cache_read_input_token_cost": 2.75e-06, - "cache_read_input_audio_token_cost": 2.5e-06, - "output_cost_per_token": 2.2e-05, - "output_cost_per_audio_token": 8e-05, + "input_cost_per_token": 0.0000055, + "input_cost_per_audio_token": 0.000044, + "cache_read_input_token_cost": 0.00000275, + "cache_read_input_audio_token_cost": 0.0000025, + "output_cost_per_token": 0.000022, + "output_cost_per_audio_token": 0.00008, "litellm_provider": "azure", "mode": "chat", "supported_modalities": [ @@ -2650,11 +3163,11 @@ "max_tokens": 4096, "max_input_tokens": 128000, "max_output_tokens": 4096, - "input_cost_per_token": 5e-06, + "input_cost_per_token": 0.000005, "input_cost_per_audio_token": 0.0001, - "cache_read_input_token_cost": 2.5e-06, - "cache_creation_input_audio_token_cost": 2e-05, - "output_cost_per_token": 2e-05, + "cache_read_input_token_cost": 0.0000025, + "cache_creation_input_audio_token_cost": 0.00002, + "output_cost_per_token": 0.00002, "output_cost_per_audio_token": 0.0002, "litellm_provider": "azure", "mode": "chat", @@ -2669,11 +3182,11 @@ "max_tokens": 4096, "max_input_tokens": 128000, "max_output_tokens": 4096, - "input_cost_per_token": 5.5e-06, + "input_cost_per_token": 0.0000055, "input_cost_per_audio_token": 0.00011, - "cache_read_input_token_cost": 2.75e-06, - "cache_creation_input_audio_token_cost": 2.2e-05, - "output_cost_per_token": 2.2e-05, + "cache_read_input_token_cost": 0.00000275, + "cache_creation_input_audio_token_cost": 0.000022, + "output_cost_per_token": 0.000022, "output_cost_per_audio_token": 0.00022, "litellm_provider": "azure", "mode": "chat", @@ -2688,11 +3201,11 @@ "max_tokens": 4096, "max_input_tokens": 128000, "max_output_tokens": 4096, - "input_cost_per_token": 5.5e-06, + "input_cost_per_token": 0.0000055, "input_cost_per_audio_token": 0.00011, - "cache_read_input_token_cost": 2.75e-06, - "cache_creation_input_audio_token_cost": 2.2e-05, - "output_cost_per_token": 2.2e-05, + "cache_read_input_token_cost": 0.00000275, + "cache_creation_input_audio_token_cost": 0.000022, + "output_cost_per_token": 0.000022, "output_cost_per_audio_token": 0.00022, "litellm_provider": "azure", "mode": "chat", @@ -2707,9 +3220,9 @@ "max_tokens": 100000, "max_input_tokens": 200000, "max_output_tokens": 100000, - "input_cost_per_token": 1.1e-06, - "output_cost_per_token": 4.4e-06, - "cache_read_input_token_cost": 2.75e-07, + "input_cost_per_token": 0.0000011, + "output_cost_per_token": 0.0000044, + "cache_read_input_token_cost": 2.75e-7, "litellm_provider": "azure", "mode": "chat", "supports_function_calling": true, @@ -2724,9 +3237,9 @@ "max_tokens": 100000, "max_input_tokens": 200000, "max_output_tokens": 100000, - "input_cost_per_token": 1.1e-06, - "output_cost_per_token": 4.4e-06, - "cache_read_input_token_cost": 5.5e-07, + "input_cost_per_token": 0.0000011, + "output_cost_per_token": 0.0000044, + "cache_read_input_token_cost": 5.5e-7, "litellm_provider": "azure", "mode": "chat", "supports_reasoning": true, @@ -2738,11 +3251,11 @@ "max_tokens": 100000, "max_input_tokens": 200000, "max_output_tokens": 100000, - "input_cost_per_token": 1.21e-06, - "input_cost_per_token_batches": 6.05e-07, - "output_cost_per_token": 4.84e-06, - "output_cost_per_token_batches": 2.42e-06, - "cache_read_input_token_cost": 6.05e-07, + "input_cost_per_token": 0.00000121, + "input_cost_per_token_batches": 6.05e-7, + "output_cost_per_token": 0.00000484, + "output_cost_per_token_batches": 0.00000242, + "cache_read_input_token_cost": 6.05e-7, "litellm_provider": "azure", "mode": "chat", "supports_vision": false, @@ -2754,11 +3267,11 @@ "max_tokens": 100000, "max_input_tokens": 200000, "max_output_tokens": 100000, - "input_cost_per_token": 1.21e-06, - "input_cost_per_token_batches": 6.05e-07, - "output_cost_per_token": 4.84e-06, - "output_cost_per_token_batches": 2.42e-06, - "cache_read_input_token_cost": 6.05e-07, + "input_cost_per_token": 0.00000121, + "input_cost_per_token_batches": 6.05e-7, + "output_cost_per_token": 0.00000484, + "output_cost_per_token_batches": 0.00000242, + "cache_read_input_token_cost": 6.05e-7, "litellm_provider": "azure", "mode": "chat", "supports_vision": false, @@ -2768,12 +3281,12 @@ }, "azure/tts-1": { "mode": "audio_speech", - "input_cost_per_character": 1.5e-05, + "input_cost_per_character": 0.000015, "litellm_provider": "azure" }, "azure/tts-1-hd": { "mode": "audio_speech", - "input_cost_per_character": 3e-05, + "input_cost_per_character": 0.00003, "litellm_provider": "azure" }, "azure/whisper-1": { @@ -2786,9 +3299,9 @@ "mode": "audio_transcription", "max_input_tokens": 16000, "max_output_tokens": 2000, - "input_cost_per_token": 2.5e-06, - "input_cost_per_audio_token": 6e-06, - "output_cost_per_token": 1e-05, + "input_cost_per_token": 0.0000025, + "input_cost_per_audio_token": 0.000006, + "output_cost_per_token": 0.00001, "litellm_provider": "azure", "supported_endpoints": [ "/v1/audio/transcriptions" @@ -2798,9 +3311,9 @@ "mode": "audio_transcription", "max_input_tokens": 16000, "max_output_tokens": 2000, - "input_cost_per_token": 1.25e-06, - "input_cost_per_audio_token": 3e-06, - "output_cost_per_token": 5e-06, + "input_cost_per_token": 0.00000125, + "input_cost_per_audio_token": 0.000003, + "output_cost_per_token": 0.000005, "litellm_provider": "azure", "supported_endpoints": [ "/v1/audio/transcriptions" @@ -2810,9 +3323,9 @@ "max_tokens": 100000, "max_input_tokens": 200000, "max_output_tokens": 100000, - "input_cost_per_token": 1.1e-06, - "output_cost_per_token": 4.4e-06, - "cache_read_input_token_cost": 5.5e-07, + "input_cost_per_token": 0.0000011, + "output_cost_per_token": 0.0000044, + "cache_read_input_token_cost": 5.5e-7, "litellm_provider": "azure", "mode": "chat", "supports_vision": false, @@ -2825,9 +3338,9 @@ "max_tokens": 65536, "max_input_tokens": 128000, "max_output_tokens": 65536, - "input_cost_per_token": 1.21e-06, - "output_cost_per_token": 4.84e-06, - "cache_read_input_token_cost": 6.05e-07, + "input_cost_per_token": 0.00000121, + "output_cost_per_token": 0.00000484, + "cache_read_input_token_cost": 6.05e-7, "litellm_provider": "azure", "mode": "chat", "supports_function_calling": true, @@ -2840,9 +3353,9 @@ "max_tokens": 65536, "max_input_tokens": 128000, "max_output_tokens": 65536, - "input_cost_per_token": 1.1e-06, - "output_cost_per_token": 4.4e-06, - "cache_read_input_token_cost": 5.5e-07, + "input_cost_per_token": 0.0000011, + "output_cost_per_token": 0.0000044, + "cache_read_input_token_cost": 5.5e-7, "litellm_provider": "azure", "mode": "chat", "supports_function_calling": true, @@ -2855,11 +3368,11 @@ "max_tokens": 65536, "max_input_tokens": 128000, "max_output_tokens": 65536, - "input_cost_per_token": 1.21e-06, - "input_cost_per_token_batches": 6.05e-07, - "output_cost_per_token": 4.84e-06, - "output_cost_per_token_batches": 2.42e-06, - "cache_read_input_token_cost": 6.05e-07, + "input_cost_per_token": 0.00000121, + "input_cost_per_token_batches": 6.05e-7, + "output_cost_per_token": 0.00000484, + "output_cost_per_token_batches": 0.00000242, + "cache_read_input_token_cost": 6.05e-7, "litellm_provider": "azure", "mode": "chat", "supports_function_calling": true, @@ -2871,11 +3384,11 @@ "max_tokens": 65536, "max_input_tokens": 128000, "max_output_tokens": 65536, - "input_cost_per_token": 1.21e-06, - "input_cost_per_token_batches": 6.05e-07, - "output_cost_per_token": 4.84e-06, - "output_cost_per_token_batches": 2.42e-06, - "cache_read_input_token_cost": 6.05e-07, + "input_cost_per_token": 0.00000121, + "input_cost_per_token_batches": 6.05e-7, + "output_cost_per_token": 0.00000484, + "output_cost_per_token_batches": 0.00000242, + "cache_read_input_token_cost": 6.05e-7, "litellm_provider": "azure", "mode": "chat", "supports_function_calling": true, @@ -2887,9 +3400,9 @@ "max_tokens": 100000, "max_input_tokens": 200000, "max_output_tokens": 100000, - "input_cost_per_token": 1.5e-05, - "output_cost_per_token": 6e-05, - "cache_read_input_token_cost": 7.5e-06, + "input_cost_per_token": 0.000015, + "output_cost_per_token": 0.00006, + "cache_read_input_token_cost": 0.0000075, "litellm_provider": "azure", "mode": "chat", "supports_function_calling": true, @@ -2903,9 +3416,9 @@ "max_tokens": 100000, "max_input_tokens": 200000, "max_output_tokens": 100000, - "input_cost_per_token": 1.5e-05, - "output_cost_per_token": 6e-05, - "cache_read_input_token_cost": 7.5e-06, + "input_cost_per_token": 0.000015, + "output_cost_per_token": 0.00006, + "cache_read_input_token_cost": 0.0000075, "litellm_provider": "azure", "mode": "chat", "supports_function_calling": true, @@ -2919,9 +3432,9 @@ "max_tokens": 100000, "max_input_tokens": 200000, "max_output_tokens": 100000, - "input_cost_per_token": 1.65e-05, - "output_cost_per_token": 6.6e-05, - "cache_read_input_token_cost": 8.25e-06, + "input_cost_per_token": 0.0000165, + "output_cost_per_token": 0.000066, + "cache_read_input_token_cost": 0.00000825, "litellm_provider": "azure", "mode": "chat", "supports_function_calling": true, @@ -2934,9 +3447,9 @@ "max_tokens": 100000, "max_input_tokens": 200000, "max_output_tokens": 100000, - "input_cost_per_token": 1.65e-05, - "output_cost_per_token": 6.6e-05, - "cache_read_input_token_cost": 8.25e-06, + "input_cost_per_token": 0.0000165, + "output_cost_per_token": 0.000066, + "cache_read_input_token_cost": 0.00000825, "litellm_provider": "azure", "mode": "chat", "supports_function_calling": true, @@ -2949,9 +3462,9 @@ "max_tokens": 100000, "max_input_tokens": 200000, "max_output_tokens": 100000, - "input_cost_per_token": 1.5e-06, - "output_cost_per_token": 6e-06, - "cache_read_input_token_cost": 3.75e-07, + "input_cost_per_token": 0.0000015, + "output_cost_per_token": 0.000006, + "cache_read_input_token_cost": 3.75e-7, "litellm_provider": "azure", "mode": "responses", "supports_pdf_input": true, @@ -2978,9 +3491,9 @@ "max_tokens": 32768, "max_input_tokens": 128000, "max_output_tokens": 32768, - "input_cost_per_token": 1.5e-05, - "output_cost_per_token": 6e-05, - "cache_read_input_token_cost": 7.5e-06, + "input_cost_per_token": 0.000015, + "output_cost_per_token": 0.00006, + "cache_read_input_token_cost": 0.0000075, "litellm_provider": "azure", "mode": "chat", "supports_function_calling": true, @@ -2993,9 +3506,9 @@ "max_tokens": 32768, "max_input_tokens": 128000, "max_output_tokens": 32768, - "input_cost_per_token": 1.5e-05, - "output_cost_per_token": 6e-05, - "cache_read_input_token_cost": 7.5e-06, + "input_cost_per_token": 0.000015, + "output_cost_per_token": 0.00006, + "cache_read_input_token_cost": 0.0000075, "litellm_provider": "azure", "mode": "chat", "supports_pdf_input": true, @@ -3009,9 +3522,9 @@ "max_tokens": 32768, "max_input_tokens": 128000, "max_output_tokens": 32768, - "input_cost_per_token": 1.65e-05, - "output_cost_per_token": 6.6e-05, - "cache_read_input_token_cost": 8.25e-06, + "input_cost_per_token": 0.0000165, + "output_cost_per_token": 0.000066, + "cache_read_input_token_cost": 0.00000825, "litellm_provider": "azure", "mode": "chat", "supports_function_calling": true, @@ -3023,9 +3536,9 @@ "max_tokens": 32768, "max_input_tokens": 128000, "max_output_tokens": 32768, - "input_cost_per_token": 1.65e-05, - "output_cost_per_token": 6.6e-05, - "cache_read_input_token_cost": 8.25e-06, + "input_cost_per_token": 0.0000165, + "output_cost_per_token": 0.000066, + "cache_read_input_token_cost": 0.00000825, "litellm_provider": "azure", "mode": "chat", "supports_function_calling": true, @@ -3037,11 +3550,11 @@ "max_tokens": 16384, "max_input_tokens": 128000, "max_output_tokens": 16384, - "input_cost_per_token": 7.5e-05, + "input_cost_per_token": 0.000075, "output_cost_per_token": 0.00015, - "input_cost_per_token_batches": 3.75e-05, - "output_cost_per_token_batches": 7.5e-05, - "cache_read_input_token_cost": 3.75e-05, + "input_cost_per_token_batches": 0.0000375, + "output_cost_per_token_batches": 0.000075, + "cache_read_input_token_cost": 0.0000375, "litellm_provider": "azure", "mode": "chat", "supports_function_calling": true, @@ -3056,9 +3569,9 @@ "max_tokens": 16384, "max_input_tokens": 128000, "max_output_tokens": 16384, - "input_cost_per_token": 2.5e-06, - "output_cost_per_token": 1e-05, - "cache_read_input_token_cost": 1.25e-06, + "input_cost_per_token": 0.0000025, + "output_cost_per_token": 0.00001, + "cache_read_input_token_cost": 0.00000125, "litellm_provider": "azure", "mode": "chat", "supports_function_calling": true, @@ -3072,9 +3585,9 @@ "max_tokens": 16384, "max_input_tokens": 128000, "max_output_tokens": 16384, - "input_cost_per_token": 2.5e-06, - "output_cost_per_token": 1e-05, - "cache_read_input_token_cost": 1.25e-06, + "input_cost_per_token": 0.0000025, + "output_cost_per_token": 0.00001, + "cache_read_input_token_cost": 0.00000125, "litellm_provider": "azure", "mode": "chat", "supports_function_calling": true, @@ -3088,9 +3601,9 @@ "max_tokens": 16384, "max_input_tokens": 128000, "max_output_tokens": 16384, - "input_cost_per_token": 2.5e-06, - "output_cost_per_token": 1e-05, - "cache_read_input_token_cost": 1.25e-06, + "input_cost_per_token": 0.0000025, + "output_cost_per_token": 0.00001, + "cache_read_input_token_cost": 0.00000125, "litellm_provider": "azure", "mode": "chat", "supports_function_calling": true, @@ -3104,9 +3617,9 @@ "max_tokens": 16384, "max_input_tokens": 128000, "max_output_tokens": 16384, - "input_cost_per_token": 2.5e-06, - "output_cost_per_token": 1e-05, - "cache_read_input_token_cost": 1.25e-06, + "input_cost_per_token": 0.0000025, + "output_cost_per_token": 0.00001, + "cache_read_input_token_cost": 0.00000125, "litellm_provider": "azure", "mode": "chat", "supports_function_calling": true, @@ -3120,9 +3633,9 @@ "max_tokens": 16384, "max_input_tokens": 128000, "max_output_tokens": 16384, - "input_cost_per_token": 2.75e-06, - "output_cost_per_token": 1.1e-05, - "cache_read_input_token_cost": 1.25e-06, + "input_cost_per_token": 0.00000275, + "output_cost_per_token": 0.000011, + "cache_read_input_token_cost": 0.00000125, "litellm_provider": "azure", "mode": "chat", "supports_function_calling": true, @@ -3136,9 +3649,9 @@ "max_tokens": 16384, "max_input_tokens": 128000, "max_output_tokens": 16384, - "input_cost_per_token": 2.75e-06, - "cache_creation_input_token_cost": 1.38e-06, - "output_cost_per_token": 1.1e-05, + "input_cost_per_token": 0.00000275, + "cache_creation_input_token_cost": 0.00000138, + "output_cost_per_token": 0.000011, "litellm_provider": "azure", "mode": "chat", "supports_function_calling": true, @@ -3151,9 +3664,9 @@ "max_tokens": 16384, "max_input_tokens": 128000, "max_output_tokens": 16384, - "input_cost_per_token": 2.75e-06, - "cache_creation_input_token_cost": 1.38e-06, - "output_cost_per_token": 1.1e-05, + "input_cost_per_token": 0.00000275, + "cache_creation_input_token_cost": 0.00000138, + "output_cost_per_token": 0.000011, "litellm_provider": "azure", "mode": "chat", "supports_function_calling": true, @@ -3166,8 +3679,8 @@ "max_tokens": 4096, "max_input_tokens": 128000, "max_output_tokens": 4096, - "input_cost_per_token": 5e-06, - "output_cost_per_token": 1.5e-05, + "input_cost_per_token": 0.000005, + "output_cost_per_token": 0.000015, "litellm_provider": "azure", "mode": "chat", "supports_function_calling": true, @@ -3180,9 +3693,9 @@ "max_tokens": 16384, "max_input_tokens": 128000, "max_output_tokens": 16384, - "input_cost_per_token": 2.5e-06, - "output_cost_per_token": 1e-05, - "cache_read_input_token_cost": 1.25e-06, + "input_cost_per_token": 0.0000025, + "output_cost_per_token": 0.00001, + "cache_read_input_token_cost": 0.00000125, "litellm_provider": "azure", "mode": "chat", "supports_function_calling": true, @@ -3197,9 +3710,9 @@ "max_tokens": 16384, "max_input_tokens": 128000, "max_output_tokens": 16384, - "input_cost_per_token": 2.75e-06, - "output_cost_per_token": 1.1e-05, - "cache_read_input_token_cost": 1.375e-06, + "input_cost_per_token": 0.00000275, + "output_cost_per_token": 0.000011, + "cache_read_input_token_cost": 0.000001375, "litellm_provider": "azure", "mode": "chat", "supports_function_calling": true, @@ -3213,9 +3726,9 @@ "max_tokens": 16384, "max_input_tokens": 128000, "max_output_tokens": 16384, - "input_cost_per_token": 2.75e-06, - "output_cost_per_token": 1.1e-05, - "cache_read_input_token_cost": 1.375e-06, + "input_cost_per_token": 0.00000275, + "output_cost_per_token": 0.000011, + "cache_read_input_token_cost": 0.000001375, "litellm_provider": "azure", "mode": "chat", "supports_function_calling": true, @@ -3229,9 +3742,9 @@ "max_tokens": 16384, "max_input_tokens": 128000, "max_output_tokens": 16384, - "input_cost_per_token": 2.5e-06, - "output_cost_per_token": 1e-05, - "cache_read_input_token_cost": 1.25e-06, + "input_cost_per_token": 0.0000025, + "output_cost_per_token": 0.00001, + "cache_read_input_token_cost": 0.00000125, "litellm_provider": "azure", "mode": "chat", "supports_function_calling": true, @@ -3245,8 +3758,8 @@ "max_tokens": 16384, "max_input_tokens": 128000, "max_output_tokens": 16384, - "input_cost_per_token": 1.5e-07, - "output_cost_per_token": 6e-07, + "input_cost_per_token": 1.5e-7, + "output_cost_per_token": 6e-7, "litellm_provider": "azure", "mode": "chat", "supports_function_calling": true, @@ -3259,9 +3772,9 @@ "max_tokens": 16384, "max_input_tokens": 128000, "max_output_tokens": 16384, - "input_cost_per_token": 1.65e-07, - "output_cost_per_token": 6.6e-07, - "cache_read_input_token_cost": 7.5e-08, + "input_cost_per_token": 1.65e-7, + "output_cost_per_token": 6.6e-7, + "cache_read_input_token_cost": 7.5e-8, "litellm_provider": "azure", "mode": "chat", "supports_function_calling": true, @@ -3275,9 +3788,9 @@ "max_tokens": 16384, "max_input_tokens": 128000, "max_output_tokens": 16384, - "input_cost_per_token": 1.65e-07, - "output_cost_per_token": 6.6e-07, - "cache_read_input_token_cost": 7.5e-08, + "input_cost_per_token": 1.65e-7, + "output_cost_per_token": 6.6e-7, + "cache_read_input_token_cost": 7.5e-8, "litellm_provider": "azure", "mode": "chat", "supports_function_calling": true, @@ -3291,9 +3804,9 @@ "max_tokens": 16384, "max_input_tokens": 128000, "max_output_tokens": 16384, - "input_cost_per_token": 1.65e-07, - "output_cost_per_token": 6.6e-07, - "cache_read_input_token_cost": 8.3e-08, + "input_cost_per_token": 1.65e-7, + "output_cost_per_token": 6.6e-7, + "cache_read_input_token_cost": 8.3e-8, "litellm_provider": "azure", "mode": "chat", "supports_function_calling": true, @@ -3307,9 +3820,9 @@ "max_tokens": 16384, "max_input_tokens": 128000, "max_output_tokens": 16384, - "input_cost_per_token": 1.65e-07, - "output_cost_per_token": 6.6e-07, - "cache_read_input_token_cost": 8.3e-08, + "input_cost_per_token": 1.65e-7, + "output_cost_per_token": 6.6e-7, + "cache_read_input_token_cost": 8.3e-8, "litellm_provider": "azure", "mode": "chat", "supports_function_calling": true, @@ -3323,8 +3836,8 @@ "max_tokens": 4096, "max_input_tokens": 128000, "max_output_tokens": 4096, - "input_cost_per_token": 1e-05, - "output_cost_per_token": 3e-05, + "input_cost_per_token": 0.00001, + "output_cost_per_token": 0.00003, "litellm_provider": "azure", "mode": "chat", "supports_function_calling": true, @@ -3336,8 +3849,8 @@ "max_tokens": 4096, "max_input_tokens": 128000, "max_output_tokens": 4096, - "input_cost_per_token": 1e-05, - "output_cost_per_token": 3e-05, + "input_cost_per_token": 0.00001, + "output_cost_per_token": 0.00003, "litellm_provider": "azure", "mode": "chat", "supports_function_calling": true, @@ -3348,8 +3861,8 @@ "max_tokens": 4096, "max_input_tokens": 128000, "max_output_tokens": 4096, - "input_cost_per_token": 1e-05, - "output_cost_per_token": 3e-05, + "input_cost_per_token": 0.00001, + "output_cost_per_token": 0.00003, "litellm_provider": "azure", "mode": "chat", "supports_function_calling": true, @@ -3360,8 +3873,8 @@ "max_tokens": 4096, "max_input_tokens": 8192, "max_output_tokens": 4096, - "input_cost_per_token": 3e-05, - "output_cost_per_token": 6e-05, + "input_cost_per_token": 0.00003, + "output_cost_per_token": 0.00006, "litellm_provider": "azure", "mode": "chat", "supports_function_calling": true, @@ -3371,7 +3884,7 @@ "max_tokens": 4096, "max_input_tokens": 32768, "max_output_tokens": 4096, - "input_cost_per_token": 6e-05, + "input_cost_per_token": 0.00006, "output_cost_per_token": 0.00012, "litellm_provider": "azure", "mode": "chat", @@ -3381,7 +3894,7 @@ "max_tokens": 4096, "max_input_tokens": 32768, "max_output_tokens": 4096, - "input_cost_per_token": 6e-05, + "input_cost_per_token": 0.00006, "output_cost_per_token": 0.00012, "litellm_provider": "azure", "mode": "chat", @@ -3391,8 +3904,8 @@ "max_tokens": 4096, "max_input_tokens": 8192, "max_output_tokens": 4096, - "input_cost_per_token": 3e-05, - "output_cost_per_token": 6e-05, + "input_cost_per_token": 0.00003, + "output_cost_per_token": 0.00006, "litellm_provider": "azure", "mode": "chat", "supports_function_calling": true, @@ -3402,8 +3915,8 @@ "max_tokens": 4096, "max_input_tokens": 128000, "max_output_tokens": 4096, - "input_cost_per_token": 1e-05, - "output_cost_per_token": 3e-05, + "input_cost_per_token": 0.00001, + "output_cost_per_token": 0.00003, "litellm_provider": "azure", "mode": "chat", "supports_function_calling": true, @@ -3414,8 +3927,8 @@ "max_tokens": 4096, "max_input_tokens": 128000, "max_output_tokens": 4096, - "input_cost_per_token": 1e-05, - "output_cost_per_token": 3e-05, + "input_cost_per_token": 0.00001, + "output_cost_per_token": 0.00003, "litellm_provider": "azure", "mode": "chat", "supports_vision": true, @@ -3425,8 +3938,8 @@ "max_tokens": 4096, "max_input_tokens": 16385, "max_output_tokens": 4096, - "input_cost_per_token": 3e-06, - "output_cost_per_token": 4e-06, + "input_cost_per_token": 0.000003, + "output_cost_per_token": 0.000004, "litellm_provider": "azure", "mode": "chat", "supports_function_calling": true, @@ -3436,8 +3949,8 @@ "max_tokens": 4096, "max_input_tokens": 16384, "max_output_tokens": 4096, - "input_cost_per_token": 1e-06, - "output_cost_per_token": 2e-06, + "input_cost_per_token": 0.000001, + "output_cost_per_token": 0.000002, "litellm_provider": "azure", "mode": "chat", "supports_function_calling": true, @@ -3449,8 +3962,8 @@ "max_tokens": 4097, "max_input_tokens": 4097, "max_output_tokens": 4096, - "input_cost_per_token": 1.5e-06, - "output_cost_per_token": 2e-06, + "input_cost_per_token": 0.0000015, + "output_cost_per_token": 0.000002, "litellm_provider": "azure", "mode": "chat", "supports_function_calling": true, @@ -3462,8 +3975,8 @@ "max_tokens": 4097, "max_input_tokens": 4097, "max_output_tokens": 4096, - "input_cost_per_token": 2e-07, - "output_cost_per_token": 2e-06, + "input_cost_per_token": 2e-7, + "output_cost_per_token": 0.000002, "litellm_provider": "azure", "mode": "chat", "supports_function_calling": true, @@ -3475,8 +3988,8 @@ "max_tokens": 4096, "max_input_tokens": 16384, "max_output_tokens": 4096, - "input_cost_per_token": 5e-07, - "output_cost_per_token": 1.5e-06, + "input_cost_per_token": 5e-7, + "output_cost_per_token": 0.0000015, "litellm_provider": "azure", "mode": "chat", "supports_function_calling": true, @@ -3488,8 +4001,8 @@ "max_tokens": 4096, "max_input_tokens": 16384, "max_output_tokens": 4096, - "input_cost_per_token": 5e-07, - "output_cost_per_token": 1.5e-06, + "input_cost_per_token": 5e-7, + "output_cost_per_token": 0.0000015, "litellm_provider": "azure", "mode": "chat", "supports_function_calling": true, @@ -3501,8 +4014,8 @@ "max_tokens": 4096, "max_input_tokens": 16385, "max_output_tokens": 4096, - "input_cost_per_token": 3e-06, - "output_cost_per_token": 4e-06, + "input_cost_per_token": 0.000003, + "output_cost_per_token": 0.000004, "litellm_provider": "azure", "mode": "chat", "supports_tool_choice": true @@ -3511,8 +4024,8 @@ "max_tokens": 4096, "max_input_tokens": 4097, "max_output_tokens": 4096, - "input_cost_per_token": 5e-07, - "output_cost_per_token": 1.5e-06, + "input_cost_per_token": 5e-7, + "output_cost_per_token": 0.0000015, "litellm_provider": "azure", "mode": "chat", "supports_function_calling": true, @@ -3522,8 +4035,8 @@ "max_tokens": 4096, "max_input_tokens": 4097, "max_output_tokens": 4096, - "input_cost_per_token": 5e-07, - "output_cost_per_token": 1.5e-06, + "input_cost_per_token": 5e-7, + "output_cost_per_token": 0.0000015, "litellm_provider": "azure", "mode": "chat", "supports_function_calling": true, @@ -3532,32 +4045,32 @@ "azure/gpt-3.5-turbo-instruct-0914": { "max_tokens": 4097, "max_input_tokens": 4097, - "input_cost_per_token": 1.5e-06, - "output_cost_per_token": 2e-06, + "input_cost_per_token": 0.0000015, + "output_cost_per_token": 0.000002, "litellm_provider": "azure_text", "mode": "completion" }, "azure/gpt-35-turbo-instruct": { "max_tokens": 4097, "max_input_tokens": 4097, - "input_cost_per_token": 1.5e-06, - "output_cost_per_token": 2e-06, + "input_cost_per_token": 0.0000015, + "output_cost_per_token": 0.000002, "litellm_provider": "azure_text", "mode": "completion" }, "azure/gpt-35-turbo-instruct-0914": { "max_tokens": 4097, "max_input_tokens": 4097, - "input_cost_per_token": 1.5e-06, - "output_cost_per_token": 2e-06, + "input_cost_per_token": 0.0000015, + "output_cost_per_token": 0.000002, "litellm_provider": "azure_text", "mode": "completion" }, "azure/mistral-large-latest": { "max_tokens": 32000, "max_input_tokens": 32000, - "input_cost_per_token": 8e-06, - "output_cost_per_token": 2.4e-05, + "input_cost_per_token": 0.000008, + "output_cost_per_token": 0.000024, "litellm_provider": "azure", "mode": "chat", "supports_function_calling": true @@ -3565,8 +4078,8 @@ "azure/mistral-large-2402": { "max_tokens": 32000, "max_input_tokens": 32000, - "input_cost_per_token": 8e-06, - "output_cost_per_token": 2.4e-05, + "input_cost_per_token": 0.000008, + "output_cost_per_token": 0.000024, "litellm_provider": "azure", "mode": "chat", "supports_function_calling": true @@ -3575,8 +4088,8 @@ "max_tokens": 4096, "max_input_tokens": 128000, "max_output_tokens": 4096, - "input_cost_per_token": 3e-06, - "output_cost_per_token": 1.5e-05, + "input_cost_per_token": 0.000003, + "output_cost_per_token": 0.000015, "litellm_provider": "azure", "mode": "chat", "supports_function_calling": true @@ -3584,39 +4097,39 @@ "azure/ada": { "max_tokens": 8191, "max_input_tokens": 8191, - "input_cost_per_token": 1e-07, - "output_cost_per_token": 0.0, + "input_cost_per_token": 1e-7, + "output_cost_per_token": 0, "litellm_provider": "azure", "mode": "embedding" }, "azure/text-embedding-ada-002": { "max_tokens": 8191, "max_input_tokens": 8191, - "input_cost_per_token": 1e-07, - "output_cost_per_token": 0.0, + "input_cost_per_token": 1e-7, + "output_cost_per_token": 0, "litellm_provider": "azure", "mode": "embedding" }, "azure/text-embedding-3-large": { "max_tokens": 8191, "max_input_tokens": 8191, - "input_cost_per_token": 1.3e-07, - "output_cost_per_token": 0.0, + "input_cost_per_token": 1.3e-7, + "output_cost_per_token": 0, "litellm_provider": "azure", "mode": "embedding" }, "azure/text-embedding-3-small": { "max_tokens": 8191, "max_input_tokens": 8191, - "input_cost_per_token": 2e-08, - "output_cost_per_token": 0.0, + "input_cost_per_token": 2e-8, + "output_cost_per_token": 0, "litellm_provider": "azure", "mode": "embedding" }, "azure/gpt-image-1": { "mode": "image_generation", - "input_cost_per_pixel": 4.0054321e-08, - "output_cost_per_pixel": 0.0, + "input_cost_per_pixel": 4.0054321e-8, + "output_cost_per_pixel": 0, "litellm_provider": "azure", "supported_endpoints": [ "/v1/images/generations" @@ -3624,8 +4137,8 @@ }, "azure/low/1024-x-1024/gpt-image-1": { "mode": "image_generation", - "input_cost_per_pixel": 1.0490417e-08, - "output_cost_per_pixel": 0.0, + "input_cost_per_pixel": 1.0490417e-8, + "output_cost_per_pixel": 0, "litellm_provider": "azure", "supported_endpoints": [ "/v1/images/generations" @@ -3633,8 +4146,8 @@ }, "azure/medium/1024-x-1024/gpt-image-1": { "mode": "image_generation", - "input_cost_per_pixel": 4.0054321e-08, - "output_cost_per_pixel": 0.0, + "input_cost_per_pixel": 4.0054321e-8, + "output_cost_per_pixel": 0, "litellm_provider": "azure", "supported_endpoints": [ "/v1/images/generations" @@ -3642,8 +4155,8 @@ }, "azure/high/1024-x-1024/gpt-image-1": { "mode": "image_generation", - "input_cost_per_pixel": 1.59263611e-07, - "output_cost_per_pixel": 0.0, + "input_cost_per_pixel": 1.59263611e-7, + "output_cost_per_pixel": 0, "litellm_provider": "azure", "supported_endpoints": [ "/v1/images/generations" @@ -3651,8 +4164,8 @@ }, "azure/low/1024-x-1536/gpt-image-1": { "mode": "image_generation", - "input_cost_per_pixel": 1.0172526e-08, - "output_cost_per_pixel": 0.0, + "input_cost_per_pixel": 1.0172526e-8, + "output_cost_per_pixel": 0, "litellm_provider": "azure", "supported_endpoints": [ "/v1/images/generations" @@ -3660,8 +4173,8 @@ }, "azure/medium/1024-x-1536/gpt-image-1": { "mode": "image_generation", - "input_cost_per_pixel": 4.0054321e-08, - "output_cost_per_pixel": 0.0, + "input_cost_per_pixel": 4.0054321e-8, + "output_cost_per_pixel": 0, "litellm_provider": "azure", "supported_endpoints": [ "/v1/images/generations" @@ -3669,8 +4182,8 @@ }, "azure/high/1024-x-1536/gpt-image-1": { "mode": "image_generation", - "input_cost_per_pixel": 1.58945719e-07, - "output_cost_per_pixel": 0.0, + "input_cost_per_pixel": 1.58945719e-7, + "output_cost_per_pixel": 0, "litellm_provider": "azure", "supported_endpoints": [ "/v1/images/generations" @@ -3678,8 +4191,8 @@ }, "azure/low/1536-x-1024/gpt-image-1": { "mode": "image_generation", - "input_cost_per_pixel": 1.0172526e-08, - "output_cost_per_pixel": 0.0, + "input_cost_per_pixel": 1.0172526e-8, + "output_cost_per_pixel": 0, "litellm_provider": "azure", "supported_endpoints": [ "/v1/images/generations" @@ -3687,8 +4200,8 @@ }, "azure/medium/1536-x-1024/gpt-image-1": { "mode": "image_generation", - "input_cost_per_pixel": 4.0054321e-08, - "output_cost_per_pixel": 0.0, + "input_cost_per_pixel": 4.0054321e-8, + "output_cost_per_pixel": 0, "litellm_provider": "azure", "supported_endpoints": [ "/v1/images/generations" @@ -3696,52 +4209,52 @@ }, "azure/high/1536-x-1024/gpt-image-1": { "mode": "image_generation", - "input_cost_per_pixel": 1.58945719e-07, - "output_cost_per_pixel": 0.0, + "input_cost_per_pixel": 1.58945719e-7, + "output_cost_per_pixel": 0, "litellm_provider": "azure", "supported_endpoints": [ "/v1/images/generations" ] }, "azure/standard/1024-x-1024/dall-e-3": { - "input_cost_per_pixel": 3.81469e-08, - "output_cost_per_token": 0.0, + "input_cost_per_pixel": 3.81469e-8, + "output_cost_per_token": 0, "litellm_provider": "azure", "mode": "image_generation" }, "azure/hd/1024-x-1024/dall-e-3": { - "input_cost_per_pixel": 7.629e-08, - "output_cost_per_token": 0.0, + "input_cost_per_pixel": 7.629e-8, + "output_cost_per_token": 0, "litellm_provider": "azure", "mode": "image_generation" }, "azure/standard/1024-x-1792/dall-e-3": { - "input_cost_per_pixel": 4.359e-08, - "output_cost_per_token": 0.0, + "input_cost_per_pixel": 4.359e-8, + "output_cost_per_token": 0, "litellm_provider": "azure", "mode": "image_generation" }, "azure/standard/1792-x-1024/dall-e-3": { - "input_cost_per_pixel": 4.359e-08, - "output_cost_per_token": 0.0, + "input_cost_per_pixel": 4.359e-8, + "output_cost_per_token": 0, "litellm_provider": "azure", "mode": "image_generation" }, "azure/hd/1024-x-1792/dall-e-3": { - "input_cost_per_pixel": 6.539e-08, - "output_cost_per_token": 0.0, + "input_cost_per_pixel": 6.539e-8, + "output_cost_per_token": 0, "litellm_provider": "azure", "mode": "image_generation" }, "azure/hd/1792-x-1024/dall-e-3": { - "input_cost_per_pixel": 6.539e-08, - "output_cost_per_token": 0.0, + "input_cost_per_pixel": 6.539e-8, + "output_cost_per_token": 0, "litellm_provider": "azure", "mode": "image_generation" }, "azure/standard/1024-x-1024/dall-e-2": { - "input_cost_per_pixel": 0.0, - "output_cost_per_token": 0.0, + "input_cost_per_pixel": 0, + "output_cost_per_token": 0, "litellm_provider": "azure", "mode": "image_generation" }, @@ -3749,8 +4262,8 @@ "max_tokens": 131072, "max_input_tokens": 131072, "max_output_tokens": 131072, - "input_cost_per_token": 3.3e-06, - "output_cost_per_token": 1.65e-05, + "input_cost_per_token": 0.0000033, + "output_cost_per_token": 0.0000165, "litellm_provider": "azure_ai", "mode": "chat", "supports_function_calling": true, @@ -3763,8 +4276,8 @@ "max_tokens": 131072, "max_input_tokens": 131072, "max_output_tokens": 131072, - "input_cost_per_token": 3e-06, - "output_cost_per_token": 1.5e-05, + "input_cost_per_token": 0.000003, + "output_cost_per_token": 0.000015, "litellm_provider": "azure_ai", "mode": "chat", "supports_function_calling": true, @@ -3777,8 +4290,8 @@ "max_tokens": 131072, "max_input_tokens": 131072, "max_output_tokens": 131072, - "input_cost_per_token": 2.5e-07, - "output_cost_per_token": 1.27e-06, + "input_cost_per_token": 2.5e-7, + "output_cost_per_token": 0.00000127, "litellm_provider": "azure_ai", "mode": "chat", "supports_function_calling": true, @@ -3792,8 +4305,8 @@ "max_tokens": 131072, "max_input_tokens": 131072, "max_output_tokens": 131072, - "input_cost_per_token": 2.75e-07, - "output_cost_per_token": 1.38e-06, + "input_cost_per_token": 2.75e-7, + "output_cost_per_token": 0.00000138, "litellm_provider": "azure_ai", "mode": "chat", "supports_function_calling": true, @@ -3807,8 +4320,8 @@ "max_tokens": 8192, "max_input_tokens": 128000, "max_output_tokens": 8192, - "input_cost_per_token": 1.35e-06, - "output_cost_per_token": 5.4e-06, + "input_cost_per_token": 0.00000135, + "output_cost_per_token": 0.0000054, "litellm_provider": "azure_ai", "mode": "chat", "supports_tool_choice": true, @@ -3819,8 +4332,8 @@ "max_tokens": 8192, "max_input_tokens": 128000, "max_output_tokens": 8192, - "input_cost_per_token": 1.14e-06, - "output_cost_per_token": 4.56e-06, + "input_cost_per_token": 0.00000114, + "output_cost_per_token": 0.00000456, "litellm_provider": "azure_ai", "mode": "chat", "supports_tool_choice": true, @@ -3830,8 +4343,8 @@ "max_tokens": 8192, "max_input_tokens": 128000, "max_output_tokens": 8192, - "input_cost_per_token": 1.14e-06, - "output_cost_per_token": 4.56e-06, + "input_cost_per_token": 0.00000114, + "output_cost_per_token": 0.00000456, "litellm_provider": "azure_ai", "mode": "chat", "supports_function_calling": true, @@ -3842,8 +4355,8 @@ "max_tokens": 4096, "max_input_tokens": 70000, "max_output_tokens": 4096, - "input_cost_per_token": 5e-07, - "output_cost_per_token": 7e-07, + "input_cost_per_token": 5e-7, + "output_cost_per_token": 7e-7, "litellm_provider": "azure_ai", "mode": "chat", "supports_tool_choice": true @@ -3862,8 +4375,8 @@ "max_tokens": 4096, "max_input_tokens": 131072, "max_output_tokens": 4096, - "input_cost_per_token": 1.5e-07, - "output_cost_per_token": 1.5e-07, + "input_cost_per_token": 1.5e-7, + "output_cost_per_token": 1.5e-7, "litellm_provider": "azure_ai", "mode": "chat", "supports_function_calling": true, @@ -3873,8 +4386,8 @@ "max_tokens": 8191, "max_input_tokens": 131072, "max_output_tokens": 8191, - "input_cost_per_token": 4e-07, - "output_cost_per_token": 2e-06, + "input_cost_per_token": 4e-7, + "output_cost_per_token": 0.000002, "litellm_provider": "azure_ai", "mode": "chat", "supports_function_calling": true, @@ -3885,8 +4398,8 @@ "max_tokens": 8191, "max_input_tokens": 32000, "max_output_tokens": 8191, - "input_cost_per_token": 4e-06, - "output_cost_per_token": 1.2e-05, + "input_cost_per_token": 0.000004, + "output_cost_per_token": 0.000012, "litellm_provider": "azure_ai", "mode": "chat", "supports_function_calling": true, @@ -3896,8 +4409,8 @@ "max_tokens": 8191, "max_input_tokens": 32000, "max_output_tokens": 8191, - "input_cost_per_token": 1e-06, - "output_cost_per_token": 3e-06, + "input_cost_per_token": 0.000001, + "output_cost_per_token": 0.000003, "litellm_provider": "azure_ai", "supports_function_calling": true, "mode": "chat", @@ -3907,8 +4420,8 @@ "max_tokens": 128000, "max_input_tokens": 128000, "max_output_tokens": 128000, - "input_cost_per_token": 1e-06, - "output_cost_per_token": 3e-06, + "input_cost_per_token": 0.000001, + "output_cost_per_token": 0.000003, "litellm_provider": "azure_ai", "mode": "chat", "supports_function_calling": true, @@ -3919,8 +4432,8 @@ "max_tokens": 4096, "max_input_tokens": 128000, "max_output_tokens": 4096, - "input_cost_per_token": 2e-06, - "output_cost_per_token": 6e-06, + "input_cost_per_token": 0.000002, + "output_cost_per_token": 0.000006, "litellm_provider": "azure_ai", "supports_function_calling": true, "mode": "chat", @@ -3931,8 +4444,8 @@ "max_tokens": 4096, "max_input_tokens": 128000, "max_output_tokens": 4096, - "input_cost_per_token": 2e-06, - "output_cost_per_token": 6e-06, + "input_cost_per_token": 0.000002, + "output_cost_per_token": 0.000006, "litellm_provider": "azure_ai", "supports_function_calling": true, "mode": "chat", @@ -3943,8 +4456,8 @@ "max_tokens": 4096, "max_input_tokens": 128000, "max_output_tokens": 4096, - "input_cost_per_token": 4e-08, - "output_cost_per_token": 4e-08, + "input_cost_per_token": 4e-8, + "output_cost_per_token": 4e-8, "litellm_provider": "azure_ai", "supports_function_calling": true, "mode": "chat", @@ -3955,8 +4468,8 @@ "max_tokens": 2048, "max_input_tokens": 128000, "max_output_tokens": 2048, - "input_cost_per_token": 3.7e-07, - "output_cost_per_token": 3.7e-07, + "input_cost_per_token": 3.7e-7, + "output_cost_per_token": 3.7e-7, "litellm_provider": "azure_ai", "supports_function_calling": true, "supports_vision": true, @@ -3968,8 +4481,8 @@ "max_tokens": 2048, "max_input_tokens": 128000, "max_output_tokens": 2048, - "input_cost_per_token": 7.1e-07, - "output_cost_per_token": 7.1e-07, + "input_cost_per_token": 7.1e-7, + "output_cost_per_token": 7.1e-7, "litellm_provider": "azure_ai", "supports_function_calling": true, "mode": "chat", @@ -3980,8 +4493,8 @@ "max_tokens": 16384, "max_input_tokens": 10000000, "max_output_tokens": 16384, - "input_cost_per_token": 2e-07, - "output_cost_per_token": 7.8e-07, + "input_cost_per_token": 2e-7, + "output_cost_per_token": 7.8e-7, "litellm_provider": "azure_ai", "supports_function_calling": true, "supports_vision": true, @@ -3993,8 +4506,8 @@ "max_tokens": 16384, "max_input_tokens": 1000000, "max_output_tokens": 16384, - "input_cost_per_token": 1.41e-06, - "output_cost_per_token": 3.5e-07, + "input_cost_per_token": 0.00000141, + "output_cost_per_token": 3.5e-7, "litellm_provider": "azure_ai", "supports_function_calling": true, "supports_vision": true, @@ -4006,8 +4519,8 @@ "max_tokens": 2048, "max_input_tokens": 128000, "max_output_tokens": 2048, - "input_cost_per_token": 2.04e-06, - "output_cost_per_token": 2.04e-06, + "input_cost_per_token": 0.00000204, + "output_cost_per_token": 0.00000204, "litellm_provider": "azure_ai", "supports_function_calling": true, "supports_vision": true, @@ -4019,8 +4532,8 @@ "max_tokens": 2048, "max_input_tokens": 8192, "max_output_tokens": 2048, - "input_cost_per_token": 1.1e-06, - "output_cost_per_token": 3.7e-07, + "input_cost_per_token": 0.0000011, + "output_cost_per_token": 3.7e-7, "litellm_provider": "azure_ai", "mode": "chat", "supports_tool_choice": true @@ -4029,8 +4542,8 @@ "max_tokens": 2048, "max_input_tokens": 128000, "max_output_tokens": 2048, - "input_cost_per_token": 3e-07, - "output_cost_per_token": 6.1e-07, + "input_cost_per_token": 3e-7, + "output_cost_per_token": 6.1e-7, "litellm_provider": "azure_ai", "mode": "chat", "source": "https://azuremarketplace.microsoft.com/en-us/marketplace/apps/metagenai.meta-llama-3-1-8b-instruct-offer?tab=PlansAndPrice", @@ -4040,8 +4553,8 @@ "max_tokens": 2048, "max_input_tokens": 128000, "max_output_tokens": 2048, - "input_cost_per_token": 2.68e-06, - "output_cost_per_token": 3.54e-06, + "input_cost_per_token": 0.00000268, + "output_cost_per_token": 0.00000354, "litellm_provider": "azure_ai", "mode": "chat", "source": "https://azuremarketplace.microsoft.com/en-us/marketplace/apps/metagenai.meta-llama-3-1-70b-instruct-offer?tab=PlansAndPrice", @@ -4051,8 +4564,8 @@ "max_tokens": 2048, "max_input_tokens": 128000, "max_output_tokens": 2048, - "input_cost_per_token": 5.33e-06, - "output_cost_per_token": 1.6e-05, + "input_cost_per_token": 0.00000533, + "output_cost_per_token": 0.000016, "litellm_provider": "azure_ai", "mode": "chat", "source": "https://azuremarketplace.microsoft.com/en-us/marketplace/apps/metagenai.meta-llama-3-1-405b-instruct-offer?tab=PlansAndPrice", @@ -4062,8 +4575,8 @@ "max_tokens": 4096, "max_input_tokens": 131072, "max_output_tokens": 4096, - "input_cost_per_token": 7.5e-08, - "output_cost_per_token": 3e-07, + "input_cost_per_token": 7.5e-8, + "output_cost_per_token": 3e-7, "litellm_provider": "azure_ai", "mode": "chat", "supports_function_calling": true, @@ -4073,9 +4586,9 @@ "max_tokens": 4096, "max_input_tokens": 131072, "max_output_tokens": 4096, - "input_cost_per_token": 8e-08, - "input_cost_per_audio_token": 4e-06, - "output_cost_per_token": 3.2e-07, + "input_cost_per_token": 8e-8, + "input_cost_per_audio_token": 0.000004, + "output_cost_per_token": 3.2e-7, "litellm_provider": "azure_ai", "mode": "chat", "supports_audio_input": true, @@ -4087,8 +4600,8 @@ "max_tokens": 16384, "max_input_tokens": 16384, "max_output_tokens": 16384, - "input_cost_per_token": 1.25e-07, - "output_cost_per_token": 5e-07, + "input_cost_per_token": 1.25e-7, + "output_cost_per_token": 5e-7, "litellm_provider": "azure_ai", "mode": "chat", "supports_vision": false, @@ -4100,8 +4613,8 @@ "max_tokens": 4096, "max_input_tokens": 128000, "max_output_tokens": 4096, - "input_cost_per_token": 1.3e-07, - "output_cost_per_token": 5.2e-07, + "input_cost_per_token": 1.3e-7, + "output_cost_per_token": 5.2e-7, "litellm_provider": "azure_ai", "mode": "chat", "supports_vision": false, @@ -4112,8 +4625,8 @@ "max_tokens": 4096, "max_input_tokens": 128000, "max_output_tokens": 4096, - "input_cost_per_token": 1.3e-07, - "output_cost_per_token": 5.2e-07, + "input_cost_per_token": 1.3e-7, + "output_cost_per_token": 5.2e-7, "litellm_provider": "azure_ai", "mode": "chat", "supports_vision": true, @@ -4124,8 +4637,8 @@ "max_tokens": 4096, "max_input_tokens": 128000, "max_output_tokens": 4096, - "input_cost_per_token": 1.6e-07, - "output_cost_per_token": 6.4e-07, + "input_cost_per_token": 1.6e-7, + "output_cost_per_token": 6.4e-7, "litellm_provider": "azure_ai", "mode": "chat", "supports_vision": false, @@ -4136,8 +4649,8 @@ "max_tokens": 4096, "max_input_tokens": 4096, "max_output_tokens": 4096, - "input_cost_per_token": 1.3e-07, - "output_cost_per_token": 5.2e-07, + "input_cost_per_token": 1.3e-7, + "output_cost_per_token": 5.2e-7, "litellm_provider": "azure_ai", "mode": "chat", "supports_vision": false, @@ -4148,8 +4661,8 @@ "max_tokens": 4096, "max_input_tokens": 128000, "max_output_tokens": 4096, - "input_cost_per_token": 1.3e-07, - "output_cost_per_token": 5.2e-07, + "input_cost_per_token": 1.3e-7, + "output_cost_per_token": 5.2e-7, "litellm_provider": "azure_ai", "mode": "chat", "supports_vision": false, @@ -4160,8 +4673,8 @@ "max_tokens": 4096, "max_input_tokens": 8192, "max_output_tokens": 4096, - "input_cost_per_token": 1.5e-07, - "output_cost_per_token": 6e-07, + "input_cost_per_token": 1.5e-7, + "output_cost_per_token": 6e-7, "litellm_provider": "azure_ai", "mode": "chat", "supports_vision": false, @@ -4172,8 +4685,8 @@ "max_tokens": 4096, "max_input_tokens": 128000, "max_output_tokens": 4096, - "input_cost_per_token": 1.5e-07, - "output_cost_per_token": 6e-07, + "input_cost_per_token": 1.5e-7, + "output_cost_per_token": 6e-7, "litellm_provider": "azure_ai", "mode": "chat", "supports_vision": false, @@ -4184,8 +4697,8 @@ "max_tokens": 4096, "max_input_tokens": 4096, "max_output_tokens": 4096, - "input_cost_per_token": 1.7e-07, - "output_cost_per_token": 6.8e-07, + "input_cost_per_token": 1.7e-7, + "output_cost_per_token": 6.8e-7, "litellm_provider": "azure_ai", "mode": "chat", "supports_vision": false, @@ -4196,8 +4709,8 @@ "max_tokens": 4096, "max_input_tokens": 128000, "max_output_tokens": 4096, - "input_cost_per_token": 1.7e-07, - "output_cost_per_token": 6.8e-07, + "input_cost_per_token": 1.7e-7, + "output_cost_per_token": 6.8e-7, "litellm_provider": "azure_ai", "mode": "chat", "supports_vision": false, @@ -4209,9 +4722,9 @@ "max_input_tokens": 4096, "max_output_tokens": 4096, "max_query_tokens": 2048, - "input_cost_per_token": 0.0, + "input_cost_per_token": 0, "input_cost_per_query": 0.002, - "output_cost_per_token": 0.0, + "output_cost_per_token": 0, "litellm_provider": "azure_ai", "mode": "rerank" }, @@ -4220,9 +4733,9 @@ "max_input_tokens": 4096, "max_output_tokens": 4096, "max_query_tokens": 2048, - "input_cost_per_token": 0.0, + "input_cost_per_token": 0, "input_cost_per_query": 0.002, - "output_cost_per_token": 0.0, + "output_cost_per_token": 0, "litellm_provider": "azure_ai", "mode": "rerank" }, @@ -4231,9 +4744,9 @@ "max_input_tokens": 4096, "max_output_tokens": 4096, "max_query_tokens": 2048, - "input_cost_per_token": 0.0, + "input_cost_per_token": 0, "input_cost_per_query": 0.002, - "output_cost_per_token": 0.0, + "output_cost_per_token": 0, "litellm_provider": "azure_ai", "mode": "rerank" }, @@ -4241,8 +4754,8 @@ "max_tokens": 512, "max_input_tokens": 512, "output_vector_size": 1024, - "input_cost_per_token": 1e-07, - "output_cost_per_token": 0.0, + "input_cost_per_token": 1e-7, + "output_cost_per_token": 0, "litellm_provider": "azure_ai", "mode": "embedding", "supports_embedding_image_input": true, @@ -4252,8 +4765,8 @@ "max_tokens": 512, "max_input_tokens": 512, "output_vector_size": 1024, - "input_cost_per_token": 1e-07, - "output_cost_per_token": 0.0, + "input_cost_per_token": 1e-7, + "output_cost_per_token": 0, "litellm_provider": "azure_ai", "mode": "embedding", "supports_embedding_image_input": true, @@ -4263,8 +4776,8 @@ "max_tokens": 128000, "max_input_tokens": 128000, "output_vector_size": 3072, - "input_cost_per_token": 1.2e-07, - "output_cost_per_token": 0.0, + "input_cost_per_token": 1.2e-7, + "output_cost_per_token": 0, "litellm_provider": "azure_ai", "mode": "embedding", "supports_embedding_image_input": true, @@ -4281,8 +4794,8 @@ "max_tokens": 16384, "max_input_tokens": 16384, "max_output_tokens": 4096, - "input_cost_per_token": 4e-07, - "output_cost_per_token": 4e-07, + "input_cost_per_token": 4e-7, + "output_cost_per_token": 4e-7, "litellm_provider": "text-completion-openai", "mode": "completion" }, @@ -4290,8 +4803,8 @@ "max_tokens": 16384, "max_input_tokens": 16384, "max_output_tokens": 4096, - "input_cost_per_token": 2e-06, - "output_cost_per_token": 2e-06, + "input_cost_per_token": 0.000002, + "output_cost_per_token": 0.000002, "litellm_provider": "text-completion-openai", "mode": "completion" }, @@ -4299,8 +4812,8 @@ "max_tokens": 4096, "max_input_tokens": 8192, "max_output_tokens": 4096, - "input_cost_per_token": 1.5e-06, - "output_cost_per_token": 2e-06, + "input_cost_per_token": 0.0000015, + "output_cost_per_token": 0.000002, "litellm_provider": "text-completion-openai", "mode": "completion" }, @@ -4308,8 +4821,8 @@ "max_tokens": 4097, "max_input_tokens": 8192, "max_output_tokens": 4097, - "input_cost_per_token": 1.5e-06, - "output_cost_per_token": 2e-06, + "input_cost_per_token": 0.0000015, + "output_cost_per_token": 0.000002, "litellm_provider": "text-completion-openai", "mode": "completion" }, @@ -4317,8 +4830,8 @@ "max_tokens": 8191, "max_input_tokens": 32000, "max_output_tokens": 8191, - "input_cost_per_token": 2.5e-07, - "output_cost_per_token": 2.5e-07, + "input_cost_per_token": 2.5e-7, + "output_cost_per_token": 2.5e-7, "litellm_provider": "mistral", "mode": "chat", "supports_assistant_prefill": true, @@ -4329,8 +4842,8 @@ "max_tokens": 8191, "max_input_tokens": 32000, "max_output_tokens": 8191, - "input_cost_per_token": 1e-07, - "output_cost_per_token": 3e-07, + "input_cost_per_token": 1e-7, + "output_cost_per_token": 3e-7, "litellm_provider": "mistral", "supports_function_calling": true, "mode": "chat", @@ -4342,8 +4855,8 @@ "max_tokens": 8191, "max_input_tokens": 32000, "max_output_tokens": 8191, - "input_cost_per_token": 1e-07, - "output_cost_per_token": 3e-07, + "input_cost_per_token": 1e-7, + "output_cost_per_token": 3e-7, "litellm_provider": "mistral", "supports_function_calling": true, "mode": "chat", @@ -4355,8 +4868,8 @@ "max_tokens": 8191, "max_input_tokens": 32000, "max_output_tokens": 8191, - "input_cost_per_token": 2.7e-06, - "output_cost_per_token": 8.1e-06, + "input_cost_per_token": 0.0000027, + "output_cost_per_token": 0.0000081, "litellm_provider": "mistral", "mode": "chat", "supports_assistant_prefill": true, @@ -4367,8 +4880,8 @@ "max_tokens": 8191, "max_input_tokens": 131072, "max_output_tokens": 8191, - "input_cost_per_token": 4e-07, - "output_cost_per_token": 2e-06, + "input_cost_per_token": 4e-7, + "output_cost_per_token": 0.000002, "litellm_provider": "mistral", "mode": "chat", "supports_function_calling": true, @@ -4380,8 +4893,8 @@ "max_tokens": 8191, "max_input_tokens": 131072, "max_output_tokens": 8191, - "input_cost_per_token": 4e-07, - "output_cost_per_token": 2e-06, + "input_cost_per_token": 4e-7, + "output_cost_per_token": 0.000002, "litellm_provider": "mistral", "mode": "chat", "supports_function_calling": true, @@ -4393,8 +4906,8 @@ "max_tokens": 8191, "max_input_tokens": 32000, "max_output_tokens": 8191, - "input_cost_per_token": 2.7e-06, - "output_cost_per_token": 8.1e-06, + "input_cost_per_token": 0.0000027, + "output_cost_per_token": 0.0000081, "litellm_provider": "mistral", "mode": "chat", "supports_assistant_prefill": true, @@ -4405,8 +4918,8 @@ "max_tokens": 128000, "max_input_tokens": 128000, "max_output_tokens": 128000, - "input_cost_per_token": 2e-06, - "output_cost_per_token": 6e-06, + "input_cost_per_token": 0.000002, + "output_cost_per_token": 0.000006, "litellm_provider": "mistral", "mode": "chat", "supports_function_calling": true, @@ -4418,8 +4931,8 @@ "max_tokens": 128000, "max_input_tokens": 128000, "max_output_tokens": 128000, - "input_cost_per_token": 2e-06, - "output_cost_per_token": 6e-06, + "input_cost_per_token": 0.000002, + "output_cost_per_token": 0.000006, "litellm_provider": "mistral", "mode": "chat", "supports_function_calling": true, @@ -4431,8 +4944,8 @@ "max_tokens": 8191, "max_input_tokens": 32000, "max_output_tokens": 8191, - "input_cost_per_token": 4e-06, - "output_cost_per_token": 1.2e-05, + "input_cost_per_token": 0.000004, + "output_cost_per_token": 0.000012, "litellm_provider": "mistral", "mode": "chat", "supports_function_calling": true, @@ -4444,8 +4957,8 @@ "max_tokens": 128000, "max_input_tokens": 128000, "max_output_tokens": 128000, - "input_cost_per_token": 3e-06, - "output_cost_per_token": 9e-06, + "input_cost_per_token": 0.000003, + "output_cost_per_token": 0.000009, "litellm_provider": "mistral", "mode": "chat", "supports_function_calling": true, @@ -4457,8 +4970,8 @@ "max_tokens": 128000, "max_input_tokens": 128000, "max_output_tokens": 128000, - "input_cost_per_token": 2e-06, - "output_cost_per_token": 6e-06, + "input_cost_per_token": 0.000002, + "output_cost_per_token": 0.000006, "litellm_provider": "mistral", "mode": "chat", "supports_function_calling": true, @@ -4471,8 +4984,8 @@ "max_tokens": 128000, "max_input_tokens": 128000, "max_output_tokens": 128000, - "input_cost_per_token": 2e-06, - "output_cost_per_token": 6e-06, + "input_cost_per_token": 0.000002, + "output_cost_per_token": 0.000006, "litellm_provider": "mistral", "mode": "chat", "supports_function_calling": true, @@ -4485,8 +4998,8 @@ "max_tokens": 128000, "max_input_tokens": 128000, "max_output_tokens": 128000, - "input_cost_per_token": 1.5e-07, - "output_cost_per_token": 1.5e-07, + "input_cost_per_token": 1.5e-7, + "output_cost_per_token": 1.5e-7, "litellm_provider": "mistral", "mode": "chat", "supports_function_calling": true, @@ -4499,8 +5012,8 @@ "max_tokens": 8191, "max_input_tokens": 32000, "max_output_tokens": 8191, - "input_cost_per_token": 2.5e-07, - "output_cost_per_token": 2.5e-07, + "input_cost_per_token": 2.5e-7, + "output_cost_per_token": 2.5e-7, "litellm_provider": "mistral", "mode": "chat", "supports_assistant_prefill": true, @@ -4511,8 +5024,8 @@ "max_tokens": 8191, "max_input_tokens": 32000, "max_output_tokens": 8191, - "input_cost_per_token": 7e-07, - "output_cost_per_token": 7e-07, + "input_cost_per_token": 7e-7, + "output_cost_per_token": 7e-7, "litellm_provider": "mistral", "mode": "chat", "supports_function_calling": true, @@ -4524,8 +5037,8 @@ "max_tokens": 8191, "max_input_tokens": 65336, "max_output_tokens": 8191, - "input_cost_per_token": 2e-06, - "output_cost_per_token": 6e-06, + "input_cost_per_token": 0.000002, + "output_cost_per_token": 0.000006, "litellm_provider": "mistral", "mode": "chat", "supports_function_calling": true, @@ -4537,8 +5050,8 @@ "max_tokens": 8191, "max_input_tokens": 32000, "max_output_tokens": 8191, - "input_cost_per_token": 1e-06, - "output_cost_per_token": 3e-06, + "input_cost_per_token": 0.000001, + "output_cost_per_token": 0.000003, "litellm_provider": "mistral", "mode": "chat", "supports_assistant_prefill": true, @@ -4549,8 +5062,8 @@ "max_tokens": 8191, "max_input_tokens": 32000, "max_output_tokens": 8191, - "input_cost_per_token": 1e-06, - "output_cost_per_token": 3e-06, + "input_cost_per_token": 0.000001, + "output_cost_per_token": 0.000003, "litellm_provider": "mistral", "mode": "chat", "supports_assistant_prefill": true, @@ -4561,8 +5074,8 @@ "max_tokens": 128000, "max_input_tokens": 128000, "max_output_tokens": 128000, - "input_cost_per_token": 3e-07, - "output_cost_per_token": 3e-07, + "input_cost_per_token": 3e-7, + "output_cost_per_token": 3e-7, "litellm_provider": "mistral", "mode": "chat", "source": "https://mistral.ai/technology/", @@ -4574,8 +5087,8 @@ "max_tokens": 128000, "max_input_tokens": 128000, "max_output_tokens": 128000, - "input_cost_per_token": 3e-07, - "output_cost_per_token": 3e-07, + "input_cost_per_token": 3e-7, + "output_cost_per_token": 3e-7, "litellm_provider": "mistral", "mode": "chat", "source": "https://mistral.ai/technology/", @@ -4587,8 +5100,8 @@ "max_tokens": 256000, "max_input_tokens": 256000, "max_output_tokens": 256000, - "input_cost_per_token": 2.5e-07, - "output_cost_per_token": 2.5e-07, + "input_cost_per_token": 2.5e-7, + "output_cost_per_token": 2.5e-7, "litellm_provider": "mistral", "mode": "chat", "source": "https://mistral.ai/technology/", @@ -4599,8 +5112,8 @@ "max_tokens": 256000, "max_input_tokens": 256000, "max_output_tokens": 256000, - "input_cost_per_token": 2.5e-07, - "output_cost_per_token": 2.5e-07, + "input_cost_per_token": 2.5e-7, + "output_cost_per_token": 2.5e-7, "litellm_provider": "mistral", "mode": "chat", "source": "https://mistral.ai/technology/", @@ -4611,8 +5124,8 @@ "max_tokens": 128000, "max_input_tokens": 128000, "max_output_tokens": 128000, - "input_cost_per_token": 1e-07, - "output_cost_per_token": 3e-07, + "input_cost_per_token": 1e-7, + "output_cost_per_token": 3e-7, "litellm_provider": "mistral", "mode": "chat", "source": "https://mistral.ai/news/devstral", @@ -4625,8 +5138,8 @@ "max_tokens": 128000, "max_input_tokens": 128000, "max_output_tokens": 128000, - "input_cost_per_token": 1e-07, - "output_cost_per_token": 3e-07, + "input_cost_per_token": 1e-7, + "output_cost_per_token": 3e-7, "litellm_provider": "mistral", "mode": "chat", "source": "https://mistral.ai/news/devstral", @@ -4639,8 +5152,8 @@ "max_tokens": 128000, "max_input_tokens": 128000, "max_output_tokens": 128000, - "input_cost_per_token": 4e-07, - "output_cost_per_token": 2e-06, + "input_cost_per_token": 4e-7, + "output_cost_per_token": 0.000002, "litellm_provider": "mistral", "mode": "chat", "source": "https://mistral.ai/news/devstral", @@ -4653,8 +5166,8 @@ "max_tokens": 40000, "max_input_tokens": 40000, "max_output_tokens": 40000, - "input_cost_per_token": 2e-06, - "output_cost_per_token": 5e-06, + "input_cost_per_token": 0.000002, + "output_cost_per_token": 0.000005, "litellm_provider": "mistral", "mode": "chat", "source": "https://mistral.ai/news/magistral", @@ -4668,8 +5181,8 @@ "max_tokens": 40000, "max_input_tokens": 40000, "max_output_tokens": 40000, - "input_cost_per_token": 2e-06, - "output_cost_per_token": 5e-06, + "input_cost_per_token": 0.000002, + "output_cost_per_token": 0.000005, "litellm_provider": "mistral", "mode": "chat", "source": "https://mistral.ai/news/magistral", @@ -4683,8 +5196,8 @@ "max_tokens": 40000, "max_input_tokens": 40000, "max_output_tokens": 40000, - "input_cost_per_token": 5e-07, - "output_cost_per_token": 1.5e-06, + "input_cost_per_token": 5e-7, + "output_cost_per_token": 0.0000015, "litellm_provider": "mistral", "mode": "chat", "source": "https://mistral.ai/pricing#api-pricing", @@ -4698,8 +5211,8 @@ "max_tokens": 40000, "max_input_tokens": 40000, "max_output_tokens": 40000, - "input_cost_per_token": 5e-07, - "output_cost_per_token": 1.5e-06, + "input_cost_per_token": 5e-7, + "output_cost_per_token": 0.0000015, "litellm_provider": "mistral", "mode": "chat", "source": "https://mistral.ai/pricing#api-pricing", @@ -4712,7 +5225,7 @@ "mistral/mistral-embed": { "max_tokens": 8192, "max_input_tokens": 8192, - "input_cost_per_token": 1e-07, + "input_cost_per_token": 1e-7, "litellm_provider": "mistral", "mode": "embedding" }, @@ -4720,9 +5233,9 @@ "max_tokens": 8192, "max_input_tokens": 65536, "max_output_tokens": 8192, - "input_cost_per_token": 5.5e-07, - "input_cost_per_token_cache_hit": 1.4e-07, - "output_cost_per_token": 2.19e-06, + "input_cost_per_token": 5.5e-7, + "input_cost_per_token_cache_hit": 1.4e-7, + "output_cost_per_token": 0.00000219, "litellm_provider": "deepseek", "mode": "chat", "supports_function_calling": true, @@ -4735,11 +5248,11 @@ "max_tokens": 8192, "max_input_tokens": 65536, "max_output_tokens": 8192, - "input_cost_per_token": 2.7e-07, - "input_cost_per_token_cache_hit": 7e-08, - "cache_read_input_token_cost": 7e-08, - "cache_creation_input_token_cost": 0.0, - "output_cost_per_token": 1.1e-06, + "input_cost_per_token": 2.7e-7, + "input_cost_per_token_cache_hit": 7e-8, + "cache_read_input_token_cost": 7e-8, + "cache_creation_input_token_cost": 0, + "output_cost_per_token": 0.0000011, "litellm_provider": "deepseek", "mode": "chat", "supports_function_calling": true, @@ -4751,9 +5264,9 @@ "max_tokens": 8192, "max_input_tokens": 65536, "max_output_tokens": 8192, - "input_cost_per_token": 5.5e-07, - "input_cost_per_token_cache_hit": 1.4e-07, - "output_cost_per_token": 2.19e-06, + "input_cost_per_token": 5.5e-7, + "input_cost_per_token_cache_hit": 1.4e-7, + "output_cost_per_token": 0.00000219, "litellm_provider": "deepseek", "mode": "chat", "supports_function_calling": true, @@ -4766,11 +5279,11 @@ "max_tokens": 8192, "max_input_tokens": 65536, "max_output_tokens": 8192, - "input_cost_per_token": 2.7e-07, - "input_cost_per_token_cache_hit": 7e-08, - "cache_read_input_token_cost": 7e-08, - "cache_creation_input_token_cost": 0.0, - "output_cost_per_token": 1.1e-06, + "input_cost_per_token": 2.7e-7, + "input_cost_per_token_cache_hit": 7e-8, + "cache_read_input_token_cost": 7e-8, + "cache_creation_input_token_cost": 0, + "output_cost_per_token": 0.0000011, "litellm_provider": "deepseek", "mode": "chat", "supports_function_calling": true, @@ -4782,8 +5295,8 @@ "max_tokens": 8191, "max_input_tokens": 32000, "max_output_tokens": 8191, - "input_cost_per_token": 0.0, - "output_cost_per_token": 0.0, + "input_cost_per_token": 0, + "output_cost_per_token": 0, "litellm_provider": "codestral", "mode": "chat", "source": "https://docs.mistral.ai/capabilities/code_generation/", @@ -4794,8 +5307,8 @@ "max_tokens": 8191, "max_input_tokens": 32000, "max_output_tokens": 8191, - "input_cost_per_token": 0.0, - "output_cost_per_token": 0.0, + "input_cost_per_token": 0, + "output_cost_per_token": 0, "litellm_provider": "codestral", "mode": "chat", "source": "https://docs.mistral.ai/capabilities/code_generation/", @@ -4806,8 +5319,8 @@ "max_tokens": 8191, "max_input_tokens": 32000, "max_output_tokens": 8191, - "input_cost_per_token": 0.0, - "output_cost_per_token": 0.0, + "input_cost_per_token": 0, + "output_cost_per_token": 0, "litellm_provider": "text-completion-codestral", "mode": "completion", "source": "https://docs.mistral.ai/capabilities/code_generation/" @@ -4816,8 +5329,8 @@ "max_tokens": 8191, "max_input_tokens": 32000, "max_output_tokens": 8191, - "input_cost_per_token": 0.0, - "output_cost_per_token": 0.0, + "input_cost_per_token": 0, + "output_cost_per_token": 0, "litellm_provider": "text-completion-codestral", "mode": "completion", "source": "https://docs.mistral.ai/capabilities/code_generation/" @@ -4826,8 +5339,8 @@ "max_tokens": 131072, "max_input_tokens": 131072, "max_output_tokens": 131072, - "input_cost_per_token": 5e-06, - "output_cost_per_token": 1.5e-05, + "input_cost_per_token": 0.000005, + "output_cost_per_token": 0.000015, "litellm_provider": "xai", "mode": "chat", "supports_function_calling": true, @@ -4839,9 +5352,9 @@ "max_tokens": 32768, "max_input_tokens": 32768, "max_output_tokens": 32768, - "input_cost_per_token": 2e-06, - "input_cost_per_image": 2e-06, - "output_cost_per_token": 1e-05, + "input_cost_per_token": 0.000002, + "input_cost_per_image": 0.000002, + "output_cost_per_token": 0.00001, "litellm_provider": "xai", "mode": "chat", "supports_function_calling": true, @@ -4853,9 +5366,9 @@ "max_tokens": 32768, "max_input_tokens": 32768, "max_output_tokens": 32768, - "input_cost_per_token": 2e-06, - "input_cost_per_image": 2e-06, - "output_cost_per_token": 1e-05, + "input_cost_per_token": 0.000002, + "input_cost_per_image": 0.000002, + "output_cost_per_token": 0.00001, "litellm_provider": "xai", "mode": "chat", "supports_function_calling": true, @@ -4867,9 +5380,9 @@ "max_tokens": 32768, "max_input_tokens": 32768, "max_output_tokens": 32768, - "input_cost_per_token": 2e-06, - "input_cost_per_image": 2e-06, - "output_cost_per_token": 1e-05, + "input_cost_per_token": 0.000002, + "input_cost_per_image": 0.000002, + "output_cost_per_token": 0.00001, "litellm_provider": "xai", "mode": "chat", "supports_function_calling": true, @@ -4881,8 +5394,8 @@ "max_tokens": 131072, "max_input_tokens": 131072, "max_output_tokens": 131072, - "input_cost_per_token": 3e-06, - "output_cost_per_token": 1.5e-05, + "input_cost_per_token": 0.000003, + "output_cost_per_token": 0.000015, "litellm_provider": "xai", "mode": "chat", "supports_function_calling": true, @@ -4895,8 +5408,8 @@ "max_tokens": 131072, "max_input_tokens": 131072, "max_output_tokens": 131072, - "input_cost_per_token": 3e-06, - "output_cost_per_token": 1.5e-05, + "input_cost_per_token": 0.000003, + "output_cost_per_token": 0.000015, "litellm_provider": "xai", "mode": "chat", "supports_function_calling": true, @@ -4909,8 +5422,8 @@ "max_tokens": 131072, "max_input_tokens": 131072, "max_output_tokens": 131072, - "input_cost_per_token": 3e-06, - "output_cost_per_token": 1.5e-05, + "input_cost_per_token": 0.000003, + "output_cost_per_token": 0.000015, "litellm_provider": "xai", "mode": "chat", "supports_function_calling": true, @@ -4923,8 +5436,8 @@ "max_tokens": 131072, "max_input_tokens": 131072, "max_output_tokens": 131072, - "input_cost_per_token": 5e-06, - "output_cost_per_token": 2.5e-05, + "input_cost_per_token": 0.000005, + "output_cost_per_token": 0.000025, "litellm_provider": "xai", "mode": "chat", "supports_function_calling": true, @@ -4937,8 +5450,8 @@ "max_tokens": 131072, "max_input_tokens": 131072, "max_output_tokens": 131072, - "input_cost_per_token": 5e-06, - "output_cost_per_token": 2.5e-05, + "input_cost_per_token": 0.000005, + "output_cost_per_token": 0.000025, "litellm_provider": "xai", "mode": "chat", "supports_function_calling": true, @@ -4951,8 +5464,8 @@ "max_tokens": 131072, "max_input_tokens": 131072, "max_output_tokens": 131072, - "input_cost_per_token": 3e-07, - "output_cost_per_token": 5e-07, + "input_cost_per_token": 3e-7, + "output_cost_per_token": 5e-7, "litellm_provider": "xai", "mode": "chat", "supports_function_calling": true, @@ -4966,8 +5479,8 @@ "max_tokens": 131072, "max_input_tokens": 131072, "max_output_tokens": 131072, - "input_cost_per_token": 3e-07, - "output_cost_per_token": 5e-07, + "input_cost_per_token": 3e-7, + "output_cost_per_token": 5e-7, "litellm_provider": "xai", "mode": "chat", "supports_function_calling": true, @@ -4981,8 +5494,8 @@ "max_tokens": 131072, "max_input_tokens": 131072, "max_output_tokens": 131072, - "input_cost_per_token": 6e-07, - "output_cost_per_token": 4e-06, + "input_cost_per_token": 6e-7, + "output_cost_per_token": 0.000004, "litellm_provider": "xai", "mode": "chat", "supports_function_calling": true, @@ -4996,8 +5509,8 @@ "max_tokens": 131072, "max_input_tokens": 131072, "max_output_tokens": 131072, - "input_cost_per_token": 6e-07, - "output_cost_per_token": 4e-06, + "input_cost_per_token": 6e-7, + "output_cost_per_token": 0.000004, "litellm_provider": "xai", "mode": "chat", "supports_reasoning": true, @@ -5011,8 +5524,8 @@ "max_tokens": 131072, "max_input_tokens": 131072, "max_output_tokens": 131072, - "input_cost_per_token": 3e-07, - "output_cost_per_token": 5e-07, + "input_cost_per_token": 3e-7, + "output_cost_per_token": 5e-7, "litellm_provider": "xai", "mode": "chat", "supports_function_calling": true, @@ -5026,8 +5539,8 @@ "max_tokens": 131072, "max_input_tokens": 131072, "max_output_tokens": 131072, - "input_cost_per_token": 6e-07, - "output_cost_per_token": 4e-06, + "input_cost_per_token": 6e-7, + "output_cost_per_token": 0.000004, "litellm_provider": "xai", "mode": "chat", "supports_function_calling": true, @@ -5041,9 +5554,9 @@ "max_tokens": 8192, "max_input_tokens": 8192, "max_output_tokens": 8192, - "input_cost_per_token": 5e-06, - "input_cost_per_image": 5e-06, - "output_cost_per_token": 1.5e-05, + "input_cost_per_token": 0.000005, + "input_cost_per_image": 0.000005, + "output_cost_per_token": 0.000015, "litellm_provider": "xai", "mode": "chat", "supports_function_calling": true, @@ -5055,8 +5568,8 @@ "max_tokens": 131072, "max_input_tokens": 131072, "max_output_tokens": 131072, - "input_cost_per_token": 2e-06, - "output_cost_per_token": 1e-05, + "input_cost_per_token": 0.000002, + "output_cost_per_token": 0.00001, "litellm_provider": "xai", "mode": "chat", "supports_function_calling": true, @@ -5067,8 +5580,8 @@ "max_tokens": 131072, "max_input_tokens": 131072, "max_output_tokens": 131072, - "input_cost_per_token": 2e-06, - "output_cost_per_token": 1e-05, + "input_cost_per_token": 0.000002, + "output_cost_per_token": 0.00001, "litellm_provider": "xai", "mode": "chat", "supports_function_calling": true, @@ -5079,8 +5592,8 @@ "max_tokens": 131072, "max_input_tokens": 131072, "max_output_tokens": 131072, - "input_cost_per_token": 2e-06, - "output_cost_per_token": 1e-05, + "input_cost_per_token": 0.000002, + "output_cost_per_token": 0.00001, "litellm_provider": "xai", "mode": "chat", "supports_function_calling": true, @@ -5091,8 +5604,8 @@ "max_tokens": 256000, "max_input_tokens": 256000, "max_output_tokens": 256000, - "input_cost_per_token": 3e-06, - "output_cost_per_token": 1.5e-05, + "input_cost_per_token": 0.000003, + "output_cost_per_token": 0.000015, "litellm_provider": "xai", "mode": "chat", "supports_function_calling": true, @@ -5105,8 +5618,8 @@ "max_tokens": 256000, "max_input_tokens": 256000, "max_output_tokens": 256000, - "input_cost_per_token": 3e-06, - "output_cost_per_token": 1.5e-05, + "input_cost_per_token": 0.000003, + "output_cost_per_token": 0.000015, "litellm_provider": "xai", "mode": "chat", "supports_function_calling": true, @@ -5119,8 +5632,8 @@ "max_tokens": 256000, "max_input_tokens": 256000, "max_output_tokens": 256000, - "input_cost_per_token": 3e-06, - "output_cost_per_token": 1.5e-05, + "input_cost_per_token": 0.000003, + "output_cost_per_token": 0.000015, "litellm_provider": "xai", "mode": "chat", "supports_function_calling": true, @@ -5133,9 +5646,9 @@ "max_tokens": 4096, "max_input_tokens": 128000, "max_output_tokens": 4096, - "input_cost_per_token": 1.4e-07, - "input_cost_per_token_cache_hit": 1.4e-08, - "output_cost_per_token": 2.8e-07, + "input_cost_per_token": 1.4e-7, + "input_cost_per_token_cache_hit": 1.4e-8, + "output_cost_per_token": 2.8e-7, "litellm_provider": "deepseek", "mode": "chat", "supports_function_calling": true, @@ -5147,8 +5660,8 @@ "max_tokens": 128000, "max_input_tokens": 128000, "max_output_tokens": 128000, - "input_cost_per_token": 7.5e-07, - "output_cost_per_token": 9.9e-07, + "input_cost_per_token": 7.5e-7, + "output_cost_per_token": 9.9e-7, "litellm_provider": "groq", "mode": "chat", "supports_function_calling": true, @@ -5160,8 +5673,8 @@ "max_tokens": 32768, "max_input_tokens": 128000, "max_output_tokens": 32768, - "input_cost_per_token": 5.9e-07, - "output_cost_per_token": 7.9e-07, + "input_cost_per_token": 5.9e-7, + "output_cost_per_token": 7.9e-7, "litellm_provider": "groq", "mode": "chat", "supports_function_calling": true, @@ -5172,8 +5685,8 @@ "max_tokens": 8192, "max_input_tokens": 8192, "max_output_tokens": 8192, - "input_cost_per_token": 5.9e-07, - "output_cost_per_token": 9.9e-07, + "input_cost_per_token": 5.9e-7, + "output_cost_per_token": 9.9e-7, "litellm_provider": "groq", "mode": "chat", "supports_tool_choice": true, @@ -5183,8 +5696,8 @@ "max_tokens": 8192, "max_input_tokens": 8192, "max_output_tokens": 8192, - "input_cost_per_token": 2e-07, - "output_cost_per_token": 2e-07, + "input_cost_per_token": 2e-7, + "output_cost_per_token": 2e-7, "litellm_provider": "groq", "mode": "chat" }, @@ -5192,8 +5705,8 @@ "max_tokens": 4096, "max_input_tokens": 4096, "max_output_tokens": 4096, - "input_cost_per_token": 7e-07, - "output_cost_per_token": 8e-07, + "input_cost_per_token": 7e-7, + "output_cost_per_token": 8e-7, "litellm_provider": "groq", "mode": "chat", "supports_function_calling": true, @@ -5204,8 +5717,8 @@ "max_tokens": 8192, "max_input_tokens": 8192, "max_output_tokens": 8192, - "input_cost_per_token": 5e-08, - "output_cost_per_token": 8e-08, + "input_cost_per_token": 5e-8, + "output_cost_per_token": 8e-8, "litellm_provider": "groq", "mode": "chat", "supports_tool_choice": true @@ -5214,8 +5727,8 @@ "max_tokens": 8192, "max_input_tokens": 8192, "max_output_tokens": 8192, - "input_cost_per_token": 4e-08, - "output_cost_per_token": 4e-08, + "input_cost_per_token": 4e-8, + "output_cost_per_token": 4e-8, "litellm_provider": "groq", "mode": "chat", "supports_function_calling": true, @@ -5227,8 +5740,8 @@ "max_tokens": 8192, "max_input_tokens": 8192, "max_output_tokens": 8192, - "input_cost_per_token": 6e-08, - "output_cost_per_token": 6e-08, + "input_cost_per_token": 6e-8, + "output_cost_per_token": 6e-8, "litellm_provider": "groq", "mode": "chat", "supports_function_calling": true, @@ -5240,8 +5753,8 @@ "max_tokens": 8192, "max_input_tokens": 8192, "max_output_tokens": 8192, - "input_cost_per_token": 1.8e-07, - "output_cost_per_token": 1.8e-07, + "input_cost_per_token": 1.8e-7, + "output_cost_per_token": 1.8e-7, "litellm_provider": "groq", "mode": "chat", "supports_function_calling": true, @@ -5253,8 +5766,8 @@ "max_tokens": 8192, "max_input_tokens": 8192, "max_output_tokens": 8192, - "input_cost_per_token": 1.8e-07, - "output_cost_per_token": 1.8e-07, + "input_cost_per_token": 1.8e-7, + "output_cost_per_token": 1.8e-7, "litellm_provider": "groq", "mode": "chat", "supports_function_calling": true, @@ -5267,8 +5780,8 @@ "max_tokens": 8192, "max_input_tokens": 8192, "max_output_tokens": 8192, - "input_cost_per_token": 9e-07, - "output_cost_per_token": 9e-07, + "input_cost_per_token": 9e-7, + "output_cost_per_token": 9e-7, "litellm_provider": "groq", "mode": "chat", "supports_function_calling": true, @@ -5280,8 +5793,8 @@ "max_tokens": 8192, "max_input_tokens": 8192, "max_output_tokens": 8192, - "input_cost_per_token": 9e-07, - "output_cost_per_token": 9e-07, + "input_cost_per_token": 9e-7, + "output_cost_per_token": 9e-7, "litellm_provider": "groq", "mode": "chat", "supports_function_calling": true, @@ -5294,8 +5807,8 @@ "max_tokens": 8192, "max_input_tokens": 8192, "max_output_tokens": 8192, - "input_cost_per_token": 5.9e-07, - "output_cost_per_token": 7.9e-07, + "input_cost_per_token": 5.9e-7, + "output_cost_per_token": 7.9e-7, "litellm_provider": "groq", "mode": "chat", "supports_response_schema": true, @@ -5305,8 +5818,8 @@ "max_tokens": 8192, "max_input_tokens": 128000, "max_output_tokens": 8192, - "input_cost_per_token": 5e-08, - "output_cost_per_token": 8e-08, + "input_cost_per_token": 5e-8, + "output_cost_per_token": 8e-8, "litellm_provider": "groq", "mode": "chat", "supports_function_calling": true, @@ -5317,8 +5830,8 @@ "max_tokens": 8192, "max_input_tokens": 8192, "max_output_tokens": 8192, - "input_cost_per_token": 5.9e-07, - "output_cost_per_token": 7.9e-07, + "input_cost_per_token": 5.9e-7, + "output_cost_per_token": 7.9e-7, "litellm_provider": "groq", "mode": "chat", "supports_function_calling": true, @@ -5330,8 +5843,8 @@ "max_tokens": 8192, "max_input_tokens": 8192, "max_output_tokens": 8192, - "input_cost_per_token": 5.9e-07, - "output_cost_per_token": 7.9e-07, + "input_cost_per_token": 5.9e-7, + "output_cost_per_token": 7.9e-7, "litellm_provider": "groq", "mode": "chat", "supports_function_calling": true, @@ -5342,8 +5855,8 @@ "max_tokens": 8192, "max_input_tokens": 131072, "max_output_tokens": 8192, - "input_cost_per_token": 1.1e-07, - "output_cost_per_token": 3.4e-07, + "input_cost_per_token": 1.1e-7, + "output_cost_per_token": 3.4e-7, "litellm_provider": "groq", "mode": "chat", "supports_function_calling": true, @@ -5354,8 +5867,8 @@ "max_tokens": 8192, "max_input_tokens": 131072, "max_output_tokens": 8192, - "input_cost_per_token": 2e-07, - "output_cost_per_token": 6e-07, + "input_cost_per_token": 2e-7, + "output_cost_per_token": 6e-7, "litellm_provider": "groq", "mode": "chat", "supports_function_calling": true, @@ -5366,8 +5879,8 @@ "max_tokens": 32000, "max_input_tokens": 32000, "max_output_tokens": 32000, - "input_cost_per_token": 7.9e-07, - "output_cost_per_token": 7.9e-07, + "input_cost_per_token": 7.9e-7, + "output_cost_per_token": 7.9e-7, "litellm_provider": "groq", "mode": "chat" }, @@ -5375,8 +5888,8 @@ "max_tokens": 32768, "max_input_tokens": 32768, "max_output_tokens": 32768, - "input_cost_per_token": 2.4e-07, - "output_cost_per_token": 2.4e-07, + "input_cost_per_token": 2.4e-7, + "output_cost_per_token": 2.4e-7, "litellm_provider": "groq", "mode": "chat", "supports_function_calling": true, @@ -5388,8 +5901,8 @@ "max_tokens": 8192, "max_input_tokens": 8192, "max_output_tokens": 8192, - "input_cost_per_token": 7e-08, - "output_cost_per_token": 7e-08, + "input_cost_per_token": 7e-8, + "output_cost_per_token": 7e-8, "litellm_provider": "groq", "mode": "chat", "supports_function_calling": true, @@ -5401,8 +5914,8 @@ "max_tokens": 8192, "max_input_tokens": 8192, "max_output_tokens": 8192, - "input_cost_per_token": 2e-07, - "output_cost_per_token": 2e-07, + "input_cost_per_token": 2e-7, + "output_cost_per_token": 2e-7, "litellm_provider": "groq", "mode": "chat", "supports_function_calling": false, @@ -5413,8 +5926,8 @@ "max_tokens": 8192, "max_input_tokens": 8192, "max_output_tokens": 8192, - "input_cost_per_token": 8.9e-07, - "output_cost_per_token": 8.9e-07, + "input_cost_per_token": 8.9e-7, + "output_cost_per_token": 8.9e-7, "litellm_provider": "groq", "mode": "chat", "supports_function_calling": true, @@ -5426,8 +5939,8 @@ "max_tokens": 8192, "max_input_tokens": 8192, "max_output_tokens": 8192, - "input_cost_per_token": 1.9e-07, - "output_cost_per_token": 1.9e-07, + "input_cost_per_token": 1.9e-7, + "output_cost_per_token": 1.9e-7, "litellm_provider": "groq", "mode": "chat", "supports_function_calling": true, @@ -5439,8 +5952,8 @@ "max_tokens": 131000, "max_input_tokens": 131000, "max_output_tokens": 131000, - "input_cost_per_token": 2.9e-07, - "output_cost_per_token": 5.9e-07, + "input_cost_per_token": 2.9e-7, + "output_cost_per_token": 5.9e-7, "litellm_provider": "groq", "mode": "chat", "supports_function_calling": true, @@ -5452,8 +5965,8 @@ "max_tokens": 131072, "max_input_tokens": 131072, "max_output_tokens": 16384, - "input_cost_per_token": 1e-06, - "output_cost_per_token": 3e-06, + "input_cost_per_token": 0.000001, + "output_cost_per_token": 0.000003, "litellm_provider": "groq", "mode": "chat", "supports_function_calling": true, @@ -5464,34 +5977,64 @@ "max_tokens": 10000, "max_input_tokens": 10000, "max_output_tokens": 10000, - "input_cost_per_character": 5e-05, + "input_cost_per_character": 0.00005, "litellm_provider": "groq", "mode": "audio_speech" }, "groq/whisper-large-v3": { - "input_cost_per_second": 3.083e-05, - "output_cost_per_second": 0.0, + "input_cost_per_second": 0.00003083, + "output_cost_per_second": 0, "litellm_provider": "groq", "mode": "audio_transcription" }, "groq/whisper-large-v3-turbo": { - "input_cost_per_second": 1.111e-05, - "output_cost_per_second": 0.0, + "input_cost_per_second": 0.00001111, + "output_cost_per_second": 0, "litellm_provider": "groq", "mode": "audio_transcription" }, "groq/distil-whisper-large-v3-en": { - "input_cost_per_second": 5.56e-06, - "output_cost_per_second": 0.0, + "input_cost_per_second": 0.00000556, + "output_cost_per_second": 0, "litellm_provider": "groq", "mode": "audio_transcription" }, + "groq/openai/gpt-oss-20b": { + "max_tokens": 32768, + "max_input_tokens": 131072, + "max_output_tokens": 32768, + "input_cost_per_token": 1e-7, + "output_cost_per_token": 5e-7, + "litellm_provider": "groq", + "mode": "chat", + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_response_schema": true, + "supports_reasoning": true, + "supports_tool_choice": true, + "supports_web_search": true + }, + "groq/openai/gpt-oss-120b": { + "max_tokens": 32766, + "max_input_tokens": 131072, + "max_output_tokens": 32766, + "input_cost_per_token": 1.5e-7, + "output_cost_per_token": 7.5e-7, + "litellm_provider": "groq", + "mode": "chat", + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_response_schema": true, + "supports_reasoning": true, + "supports_tool_choice": true, + "supports_web_search": true + }, "cerebras/llama3.1-8b": { "max_tokens": 128000, "max_input_tokens": 128000, "max_output_tokens": 128000, - "input_cost_per_token": 1e-07, - "output_cost_per_token": 1e-07, + "input_cost_per_token": 1e-7, + "output_cost_per_token": 1e-7, "litellm_provider": "cerebras", "mode": "chat", "supports_function_calling": true, @@ -5501,8 +6044,8 @@ "max_tokens": 128000, "max_input_tokens": 128000, "max_output_tokens": 128000, - "input_cost_per_token": 6e-07, - "output_cost_per_token": 6e-07, + "input_cost_per_token": 6e-7, + "output_cost_per_token": 6e-7, "litellm_provider": "cerebras", "mode": "chat", "supports_function_calling": true, @@ -5512,8 +6055,8 @@ "max_tokens": 128000, "max_input_tokens": 128000, "max_output_tokens": 128000, - "input_cost_per_token": 8.5e-07, - "output_cost_per_token": 1.2e-06, + "input_cost_per_token": 8.5e-7, + "output_cost_per_token": 0.0000012, "litellm_provider": "cerebras", "mode": "chat", "supports_function_calling": true, @@ -5523,20 +6066,50 @@ "max_tokens": 128000, "max_input_tokens": 128000, "max_output_tokens": 128000, - "input_cost_per_token": 4e-07, - "output_cost_per_token": 8e-07, + "input_cost_per_token": 4e-7, + "output_cost_per_token": 8e-7, "litellm_provider": "cerebras", "mode": "chat", "supports_function_calling": true, "supports_tool_choice": true, "source": "https://inference-docs.cerebras.ai/support/pricing" }, + "cerebras/openai/gpt-oss-20b": { + "max_tokens": 32768, + "max_input_tokens": 131072, + "max_output_tokens": 32768, + "input_cost_per_token": 7e-8, + "output_cost_per_token": 3e-7, + "litellm_provider": "cerebras", + "mode": "chat", + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_response_schema": true, + "supports_reasoning": true, + "supports_tool_choice": true, + "source": "https://inference-docs.cerebras.ai/support/pricing" + }, + "cerebras/openai/gpt-oss-120b": { + "max_tokens": 32768, + "max_input_tokens": 131072, + "max_output_tokens": 32768, + "input_cost_per_token": 2.5e-7, + "output_cost_per_token": 6.9e-7, + "litellm_provider": "cerebras", + "mode": "chat", + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_response_schema": true, + "supports_reasoning": true, + "supports_tool_choice": true, + "source": "https://www.cerebras.ai/blog/openai-gpt-oss-120b-runs-fastest-on-cerebras" + }, "friendliai/meta-llama-3.1-8b-instruct": { "max_tokens": 8192, "max_input_tokens": 8192, "max_output_tokens": 8192, - "input_cost_per_token": 1e-07, - "output_cost_per_token": 1e-07, + "input_cost_per_token": 1e-7, + "output_cost_per_token": 1e-7, "litellm_provider": "friendliai", "mode": "chat", "supports_function_calling": true, @@ -5549,8 +6122,8 @@ "max_tokens": 8192, "max_input_tokens": 8192, "max_output_tokens": 8192, - "input_cost_per_token": 6e-07, - "output_cost_per_token": 6e-07, + "input_cost_per_token": 6e-7, + "output_cost_per_token": 6e-7, "litellm_provider": "friendliai", "mode": "chat", "supports_function_calling": true, @@ -5563,10 +6136,10 @@ "max_tokens": 4096, "max_input_tokens": 200000, "max_output_tokens": 4096, - "input_cost_per_token": 2.5e-07, - "output_cost_per_token": 1.25e-06, - "cache_creation_input_token_cost": 3e-07, - "cache_read_input_token_cost": 3e-08, + "input_cost_per_token": 2.5e-7, + "output_cost_per_token": 0.00000125, + "cache_creation_input_token_cost": 3e-7, + "cache_read_input_token_cost": 3e-8, "litellm_provider": "anthropic", "mode": "chat", "supports_function_calling": true, @@ -5582,10 +6155,10 @@ "max_tokens": 8192, "max_input_tokens": 200000, "max_output_tokens": 8192, - "input_cost_per_token": 8e-07, - "output_cost_per_token": 4e-06, - "cache_creation_input_token_cost": 1e-06, - "cache_read_input_token_cost": 8e-08, + "input_cost_per_token": 8e-7, + "output_cost_per_token": 0.000004, + "cache_creation_input_token_cost": 0.000001, + "cache_read_input_token_cost": 8e-8, "search_context_cost_per_query": { "search_context_size_low": 0.01, "search_context_size_medium": 0.01, @@ -5608,10 +6181,10 @@ "max_tokens": 8192, "max_input_tokens": 200000, "max_output_tokens": 8192, - "input_cost_per_token": 1e-06, - "output_cost_per_token": 5e-06, - "cache_creation_input_token_cost": 1.25e-06, - "cache_read_input_token_cost": 1e-07, + "input_cost_per_token": 0.000001, + "output_cost_per_token": 0.000005, + "cache_creation_input_token_cost": 0.00000125, + "cache_read_input_token_cost": 1e-7, "search_context_cost_per_query": { "search_context_size_low": 0.01, "search_context_size_medium": 0.01, @@ -5634,10 +6207,10 @@ "max_tokens": 4096, "max_input_tokens": 200000, "max_output_tokens": 4096, - "input_cost_per_token": 1.5e-05, - "output_cost_per_token": 7.5e-05, - "cache_creation_input_token_cost": 1.875e-05, - "cache_read_input_token_cost": 1.5e-06, + "input_cost_per_token": 0.000015, + "output_cost_per_token": 0.000075, + "cache_creation_input_token_cost": 0.00001875, + "cache_read_input_token_cost": 0.0000015, "litellm_provider": "anthropic", "mode": "chat", "supports_function_calling": true, @@ -5653,10 +6226,10 @@ "max_tokens": 4096, "max_input_tokens": 200000, "max_output_tokens": 4096, - "input_cost_per_token": 1.5e-05, - "output_cost_per_token": 7.5e-05, - "cache_creation_input_token_cost": 1.875e-05, - "cache_read_input_token_cost": 1.5e-06, + "input_cost_per_token": 0.000015, + "output_cost_per_token": 0.000075, + "cache_creation_input_token_cost": 0.00001875, + "cache_read_input_token_cost": 0.0000015, "litellm_provider": "anthropic", "mode": "chat", "supports_function_calling": true, @@ -5673,10 +6246,10 @@ "max_tokens": 8192, "max_input_tokens": 200000, "max_output_tokens": 8192, - "input_cost_per_token": 3e-06, - "output_cost_per_token": 1.5e-05, - "cache_creation_input_token_cost": 3.75e-06, - "cache_read_input_token_cost": 3e-07, + "input_cost_per_token": 0.000003, + "output_cost_per_token": 0.000015, + "cache_creation_input_token_cost": 0.00000375, + "cache_read_input_token_cost": 3e-7, "search_context_cost_per_query": { "search_context_size_low": 0.01, "search_context_size_medium": 0.01, @@ -5699,10 +6272,10 @@ "max_tokens": 8192, "max_input_tokens": 200000, "max_output_tokens": 8192, - "input_cost_per_token": 3e-06, - "output_cost_per_token": 1.5e-05, - "cache_creation_input_token_cost": 3.75e-06, - "cache_read_input_token_cost": 3e-07, + "input_cost_per_token": 0.000003, + "output_cost_per_token": 0.000015, + "cache_creation_input_token_cost": 0.00000375, + "cache_read_input_token_cost": 3e-7, "litellm_provider": "anthropic", "mode": "chat", "supports_function_calling": true, @@ -5719,15 +6292,41 @@ "max_tokens": 32000, "max_input_tokens": 200000, "max_output_tokens": 32000, - "input_cost_per_token": 1.5e-05, - "output_cost_per_token": 7.5e-05, + "input_cost_per_token": 0.000015, + "output_cost_per_token": 0.000075, "search_context_cost_per_query": { "search_context_size_low": 0.01, "search_context_size_medium": 0.01, "search_context_size_high": 0.01 }, - "cache_creation_input_token_cost": 1.875e-05, - "cache_read_input_token_cost": 1.5e-06, + "cache_creation_input_token_cost": 0.00001875, + "cache_read_input_token_cost": 0.0000015, + "litellm_provider": "anthropic", + "mode": "chat", + "supports_function_calling": true, + "supports_vision": true, + "tool_use_system_prompt_tokens": 159, + "supports_assistant_prefill": true, + "supports_pdf_input": true, + "supports_prompt_caching": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_reasoning": true, + "supports_computer_use": true + }, + "claude-opus-4-1": { + "max_tokens": 32000, + "max_input_tokens": 200000, + "max_output_tokens": 32000, + "input_cost_per_token": 0.000015, + "output_cost_per_token": 0.000075, + "search_context_cost_per_query": { + "search_context_size_low": 0.01, + "search_context_size_medium": 0.01, + "search_context_size_high": 0.01 + }, + "cache_creation_input_token_cost": 0.00001875, + "cache_read_input_token_cost": 0.0000015, "litellm_provider": "anthropic", "mode": "chat", "supports_function_calling": true, @@ -5745,15 +6344,15 @@ "max_tokens": 32000, "max_input_tokens": 200000, "max_output_tokens": 32000, - "input_cost_per_token": 1.5e-05, - "output_cost_per_token": 7.5e-05, + "input_cost_per_token": 0.000015, + "output_cost_per_token": 0.000075, "search_context_cost_per_query": { "search_context_size_low": 0.01, "search_context_size_medium": 0.01, "search_context_size_high": 0.01 }, - "cache_creation_input_token_cost": 1.875e-05, - "cache_read_input_token_cost": 1.5e-06, + "cache_creation_input_token_cost": 0.00001875, + "cache_read_input_token_cost": 0.0000015, "litellm_provider": "anthropic", "mode": "chat", "supports_function_calling": true, @@ -5771,15 +6370,15 @@ "max_tokens": 64000, "max_input_tokens": 200000, "max_output_tokens": 64000, - "input_cost_per_token": 3e-06, - "output_cost_per_token": 1.5e-05, + "input_cost_per_token": 0.000003, + "output_cost_per_token": 0.000015, "search_context_cost_per_query": { "search_context_size_low": 0.01, "search_context_size_medium": 0.01, "search_context_size_high": 0.01 }, - "cache_creation_input_token_cost": 3.75e-06, - "cache_read_input_token_cost": 3e-07, + "cache_creation_input_token_cost": 0.00000375, + "cache_read_input_token_cost": 3e-7, "litellm_provider": "anthropic", "mode": "chat", "supports_function_calling": true, @@ -5797,15 +6396,15 @@ "max_tokens": 32000, "max_input_tokens": 200000, "max_output_tokens": 32000, - "input_cost_per_token": 1.5e-05, - "output_cost_per_token": 7.5e-05, + "input_cost_per_token": 0.000015, + "output_cost_per_token": 0.000075, "search_context_cost_per_query": { "search_context_size_low": 0.01, "search_context_size_medium": 0.01, "search_context_size_high": 0.01 }, - "cache_creation_input_token_cost": 1.875e-05, - "cache_read_input_token_cost": 1.5e-06, + "cache_creation_input_token_cost": 0.00001875, + "cache_read_input_token_cost": 0.0000015, "litellm_provider": "anthropic", "mode": "chat", "supports_function_calling": true, @@ -5823,15 +6422,15 @@ "max_tokens": 64000, "max_input_tokens": 200000, "max_output_tokens": 64000, - "input_cost_per_token": 3e-06, - "output_cost_per_token": 1.5e-05, + "input_cost_per_token": 0.000003, + "output_cost_per_token": 0.000015, "search_context_cost_per_query": { "search_context_size_low": 0.01, "search_context_size_medium": 0.01, "search_context_size_high": 0.01 }, - "cache_creation_input_token_cost": 3.75e-06, - "cache_read_input_token_cost": 3e-07, + "cache_creation_input_token_cost": 0.00000375, + "cache_read_input_token_cost": 3e-7, "litellm_provider": "anthropic", "mode": "chat", "supports_function_calling": true, @@ -5850,15 +6449,15 @@ "max_tokens": 128000, "max_input_tokens": 200000, "max_output_tokens": 128000, - "input_cost_per_token": 3e-06, - "output_cost_per_token": 1.5e-05, + "input_cost_per_token": 0.000003, + "output_cost_per_token": 0.000015, "search_context_cost_per_query": { "search_context_size_low": 0.01, "search_context_size_medium": 0.01, "search_context_size_high": 0.01 }, - "cache_creation_input_token_cost": 3.75e-06, - "cache_read_input_token_cost": 3e-07, + "cache_creation_input_token_cost": 0.00000375, + "cache_read_input_token_cost": 3e-7, "litellm_provider": "anthropic", "mode": "chat", "supports_function_calling": true, @@ -5877,10 +6476,10 @@ "max_tokens": 128000, "max_input_tokens": 200000, "max_output_tokens": 128000, - "input_cost_per_token": 3e-06, - "output_cost_per_token": 1.5e-05, - "cache_creation_input_token_cost": 3.75e-06, - "cache_read_input_token_cost": 3e-07, + "input_cost_per_token": 0.000003, + "output_cost_per_token": 0.000015, + "cache_creation_input_token_cost": 0.00000375, + "cache_read_input_token_cost": 3e-7, "search_context_cost_per_query": { "search_context_size_low": 0.01, "search_context_size_medium": 0.01, @@ -5905,10 +6504,10 @@ "max_tokens": 8192, "max_input_tokens": 200000, "max_output_tokens": 8192, - "input_cost_per_token": 3e-06, - "output_cost_per_token": 1.5e-05, - "cache_creation_input_token_cost": 3.75e-06, - "cache_read_input_token_cost": 3e-07, + "input_cost_per_token": 0.000003, + "output_cost_per_token": 0.000015, + "cache_creation_input_token_cost": 0.00000375, + "cache_read_input_token_cost": 3e-7, "search_context_cost_per_query": { "search_context_size_low": 0.01, "search_context_size_medium": 0.01, @@ -5931,8 +6530,8 @@ "max_tokens": 2048, "max_input_tokens": 8192, "max_output_tokens": 2048, - "input_cost_per_character": 2.5e-07, - "output_cost_per_character": 5e-07, + "input_cost_per_character": 2.5e-7, + "output_cost_per_character": 5e-7, "litellm_provider": "vertex_ai-text-models", "mode": "completion", "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models" @@ -5941,8 +6540,8 @@ "max_tokens": 1024, "max_input_tokens": 8192, "max_output_tokens": 1024, - "input_cost_per_character": 2.5e-07, - "output_cost_per_character": 5e-07, + "input_cost_per_character": 2.5e-7, + "output_cost_per_character": 5e-7, "litellm_provider": "vertex_ai-text-models", "mode": "completion", "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models" @@ -5951,8 +6550,8 @@ "max_tokens": 1024, "max_input_tokens": 8192, "max_output_tokens": 1024, - "input_cost_per_character": 2.5e-07, - "output_cost_per_character": 5e-07, + "input_cost_per_character": 2.5e-7, + "output_cost_per_character": 5e-7, "litellm_provider": "vertex_ai-text-models", "mode": "completion", "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models" @@ -5961,10 +6560,10 @@ "max_tokens": 1024, "max_input_tokens": 8192, "max_output_tokens": 1024, - "input_cost_per_token": 1.25e-07, - "output_cost_per_token": 1.25e-07, - "input_cost_per_character": 2.5e-07, - "output_cost_per_character": 5e-07, + "input_cost_per_token": 1.25e-7, + "output_cost_per_token": 1.25e-7, + "input_cost_per_character": 2.5e-7, + "output_cost_per_character": 5e-7, "litellm_provider": "vertex_ai-text-models", "mode": "completion", "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models" @@ -5973,10 +6572,10 @@ "max_tokens": 1024, "max_input_tokens": 8192, "max_output_tokens": 1024, - "input_cost_per_token": 1.25e-07, - "output_cost_per_token": 1.25e-07, - "input_cost_per_character": 2.5e-07, - "output_cost_per_character": 5e-07, + "input_cost_per_token": 1.25e-7, + "output_cost_per_token": 1.25e-7, + "input_cost_per_character": 2.5e-7, + "output_cost_per_character": 5e-7, "litellm_provider": "vertex_ai-text-models", "mode": "completion", "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models" @@ -5985,8 +6584,8 @@ "max_tokens": 1024, "max_input_tokens": 8192, "max_output_tokens": 1024, - "input_cost_per_token": 1e-05, - "output_cost_per_token": 2.8e-05, + "input_cost_per_token": 0.00001, + "output_cost_per_token": 0.000028, "litellm_provider": "vertex_ai-text-models", "mode": "completion", "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models" @@ -5995,8 +6594,8 @@ "max_tokens": 1024, "max_input_tokens": 8192, "max_output_tokens": 1024, - "input_cost_per_token": 1e-05, - "output_cost_per_token": 2.8e-05, + "input_cost_per_token": 0.00001, + "output_cost_per_token": 0.000028, "litellm_provider": "vertex_ai-text-models", "mode": "completion", "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models" @@ -6005,10 +6604,10 @@ "max_tokens": 4096, "max_input_tokens": 8192, "max_output_tokens": 4096, - "input_cost_per_token": 1.25e-07, - "output_cost_per_token": 1.25e-07, - "input_cost_per_character": 2.5e-07, - "output_cost_per_character": 5e-07, + "input_cost_per_token": 1.25e-7, + "output_cost_per_token": 1.25e-7, + "input_cost_per_character": 2.5e-7, + "output_cost_per_character": 5e-7, "litellm_provider": "vertex_ai-chat-models", "mode": "chat", "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models", @@ -6018,10 +6617,10 @@ "max_tokens": 4096, "max_input_tokens": 8192, "max_output_tokens": 4096, - "input_cost_per_token": 1.25e-07, - "output_cost_per_token": 1.25e-07, - "input_cost_per_character": 2.5e-07, - "output_cost_per_character": 5e-07, + "input_cost_per_token": 1.25e-7, + "output_cost_per_token": 1.25e-7, + "input_cost_per_character": 2.5e-7, + "output_cost_per_character": 5e-7, "litellm_provider": "vertex_ai-chat-models", "mode": "chat", "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models", @@ -6031,10 +6630,10 @@ "max_tokens": 4096, "max_input_tokens": 8192, "max_output_tokens": 4096, - "input_cost_per_token": 1.25e-07, - "output_cost_per_token": 1.25e-07, - "input_cost_per_character": 2.5e-07, - "output_cost_per_character": 5e-07, + "input_cost_per_token": 1.25e-7, + "output_cost_per_token": 1.25e-7, + "input_cost_per_character": 2.5e-7, + "output_cost_per_character": 5e-7, "litellm_provider": "vertex_ai-chat-models", "mode": "chat", "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models", @@ -6045,10 +6644,10 @@ "max_tokens": 8192, "max_input_tokens": 32000, "max_output_tokens": 8192, - "input_cost_per_token": 1.25e-07, - "output_cost_per_token": 1.25e-07, - "input_cost_per_character": 2.5e-07, - "output_cost_per_character": 5e-07, + "input_cost_per_token": 1.25e-7, + "output_cost_per_token": 1.25e-7, + "input_cost_per_character": 2.5e-7, + "output_cost_per_character": 5e-7, "litellm_provider": "vertex_ai-chat-models", "mode": "chat", "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models", @@ -6058,10 +6657,10 @@ "max_tokens": 8192, "max_input_tokens": 32000, "max_output_tokens": 8192, - "input_cost_per_token": 1.25e-07, - "output_cost_per_token": 1.25e-07, - "input_cost_per_character": 2.5e-07, - "output_cost_per_character": 5e-07, + "input_cost_per_token": 1.25e-7, + "output_cost_per_token": 1.25e-7, + "input_cost_per_character": 2.5e-7, + "output_cost_per_character": 5e-7, "litellm_provider": "vertex_ai-chat-models", "mode": "chat", "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models", @@ -6071,10 +6670,10 @@ "max_tokens": 1024, "max_input_tokens": 6144, "max_output_tokens": 1024, - "input_cost_per_token": 1.25e-07, - "output_cost_per_token": 1.25e-07, - "input_cost_per_character": 2.5e-07, - "output_cost_per_character": 5e-07, + "input_cost_per_token": 1.25e-7, + "output_cost_per_token": 1.25e-7, + "input_cost_per_character": 2.5e-7, + "output_cost_per_character": 5e-7, "litellm_provider": "vertex_ai-code-text-models", "mode": "chat", "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models", @@ -6084,10 +6683,10 @@ "max_tokens": 1024, "max_input_tokens": 6144, "max_output_tokens": 1024, - "input_cost_per_token": 1.25e-07, - "output_cost_per_token": 1.25e-07, - "input_cost_per_character": 2.5e-07, - "output_cost_per_character": 5e-07, + "input_cost_per_token": 1.25e-7, + "output_cost_per_token": 1.25e-7, + "input_cost_per_character": 2.5e-7, + "output_cost_per_character": 5e-7, "litellm_provider": "vertex_ai-code-text-models", "mode": "completion", "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models" @@ -6096,10 +6695,10 @@ "max_tokens": 1024, "max_input_tokens": 6144, "max_output_tokens": 1024, - "input_cost_per_token": 1.25e-07, - "output_cost_per_token": 1.25e-07, - "input_cost_per_character": 2.5e-07, - "output_cost_per_character": 5e-07, + "input_cost_per_token": 1.25e-7, + "output_cost_per_token": 1.25e-7, + "input_cost_per_character": 2.5e-7, + "output_cost_per_character": 5e-7, "litellm_provider": "vertex_ai-code-text-models", "mode": "completion", "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models" @@ -6108,10 +6707,10 @@ "max_tokens": 1024, "max_input_tokens": 6144, "max_output_tokens": 1024, - "input_cost_per_token": 1.25e-07, - "output_cost_per_token": 1.25e-07, - "input_cost_per_character": 2.5e-07, - "output_cost_per_character": 5e-07, + "input_cost_per_token": 1.25e-7, + "output_cost_per_token": 1.25e-7, + "input_cost_per_character": 2.5e-7, + "output_cost_per_character": 5e-7, "litellm_provider": "vertex_ai-code-text-models", "mode": "completion", "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models" @@ -6120,10 +6719,10 @@ "max_tokens": 1024, "max_input_tokens": 6144, "max_output_tokens": 1024, - "input_cost_per_token": 1.25e-07, - "output_cost_per_token": 1.25e-07, - "input_cost_per_character": 2.5e-07, - "output_cost_per_character": 5e-07, + "input_cost_per_token": 1.25e-7, + "output_cost_per_token": 1.25e-7, + "input_cost_per_character": 2.5e-7, + "output_cost_per_character": 5e-7, "litellm_provider": "vertex_ai-code-text-models", "mode": "completion", "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models" @@ -6132,8 +6731,8 @@ "max_tokens": 64, "max_input_tokens": 2048, "max_output_tokens": 64, - "input_cost_per_token": 1.25e-07, - "output_cost_per_token": 1.25e-07, + "input_cost_per_token": 1.25e-7, + "output_cost_per_token": 1.25e-7, "litellm_provider": "vertex_ai-code-text-models", "mode": "completion", "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models" @@ -6142,8 +6741,8 @@ "max_tokens": 64, "max_input_tokens": 2048, "max_output_tokens": 64, - "input_cost_per_token": 1.25e-07, - "output_cost_per_token": 1.25e-07, + "input_cost_per_token": 1.25e-7, + "output_cost_per_token": 1.25e-7, "litellm_provider": "vertex_ai-code-text-models", "mode": "completion", "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models" @@ -6152,8 +6751,8 @@ "max_tokens": 64, "max_input_tokens": 2048, "max_output_tokens": 64, - "input_cost_per_token": 1.25e-07, - "output_cost_per_token": 1.25e-07, + "input_cost_per_token": 1.25e-7, + "output_cost_per_token": 1.25e-7, "litellm_provider": "vertex_ai-code-text-models", "mode": "completion", "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models" @@ -6162,8 +6761,8 @@ "max_tokens": 64, "max_input_tokens": 2048, "max_output_tokens": 64, - "input_cost_per_token": 1.25e-07, - "output_cost_per_token": 1.25e-07, + "input_cost_per_token": 1.25e-7, + "output_cost_per_token": 1.25e-7, "litellm_provider": "vertex_ai-code-text-models", "mode": "completion", "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models" @@ -6172,10 +6771,10 @@ "max_tokens": 1024, "max_input_tokens": 6144, "max_output_tokens": 1024, - "input_cost_per_token": 1.25e-07, - "output_cost_per_token": 1.25e-07, - "input_cost_per_character": 2.5e-07, - "output_cost_per_character": 5e-07, + "input_cost_per_token": 1.25e-7, + "output_cost_per_token": 1.25e-7, + "input_cost_per_character": 2.5e-7, + "output_cost_per_character": 5e-7, "litellm_provider": "vertex_ai-code-chat-models", "mode": "chat", "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models", @@ -6185,10 +6784,10 @@ "max_tokens": 1024, "max_input_tokens": 6144, "max_output_tokens": 1024, - "input_cost_per_token": 1.25e-07, - "output_cost_per_token": 1.25e-07, - "input_cost_per_character": 2.5e-07, - "output_cost_per_character": 5e-07, + "input_cost_per_token": 1.25e-7, + "output_cost_per_token": 1.25e-7, + "input_cost_per_character": 2.5e-7, + "output_cost_per_character": 5e-7, "litellm_provider": "vertex_ai-code-chat-models", "mode": "chat", "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models", @@ -6198,10 +6797,10 @@ "max_tokens": 1024, "max_input_tokens": 6144, "max_output_tokens": 1024, - "input_cost_per_token": 1.25e-07, - "output_cost_per_token": 1.25e-07, - "input_cost_per_character": 2.5e-07, - "output_cost_per_character": 5e-07, + "input_cost_per_token": 1.25e-7, + "output_cost_per_token": 1.25e-7, + "input_cost_per_character": 2.5e-7, + "output_cost_per_character": 5e-7, "litellm_provider": "vertex_ai-code-chat-models", "mode": "chat", "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models", @@ -6211,10 +6810,10 @@ "max_tokens": 1024, "max_input_tokens": 6144, "max_output_tokens": 1024, - "input_cost_per_token": 1.25e-07, - "output_cost_per_token": 1.25e-07, - "input_cost_per_character": 2.5e-07, - "output_cost_per_character": 5e-07, + "input_cost_per_token": 1.25e-7, + "output_cost_per_token": 1.25e-7, + "input_cost_per_character": 2.5e-7, + "output_cost_per_character": 5e-7, "litellm_provider": "vertex_ai-code-chat-models", "mode": "chat", "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models", @@ -6224,10 +6823,10 @@ "max_tokens": 8192, "max_input_tokens": 32000, "max_output_tokens": 8192, - "input_cost_per_token": 1.25e-07, - "output_cost_per_token": 1.25e-07, - "input_cost_per_character": 2.5e-07, - "output_cost_per_character": 5e-07, + "input_cost_per_token": 1.25e-7, + "output_cost_per_token": 1.25e-7, + "input_cost_per_character": 2.5e-7, + "output_cost_per_character": 5e-7, "litellm_provider": "vertex_ai-code-chat-models", "mode": "chat", "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models", @@ -6237,10 +6836,10 @@ "max_tokens": 8192, "max_input_tokens": 32000, "max_output_tokens": 8192, - "input_cost_per_token": 1.25e-07, - "output_cost_per_token": 1.25e-07, - "input_cost_per_character": 2.5e-07, - "output_cost_per_character": 5e-07, + "input_cost_per_token": 1.25e-7, + "output_cost_per_token": 1.25e-7, + "input_cost_per_character": 2.5e-7, + "output_cost_per_character": 5e-7, "litellm_provider": "vertex_ai-code-chat-models", "mode": "chat", "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models", @@ -6318,10 +6917,10 @@ "max_output_tokens": 8192, "input_cost_per_image": 0.0025, "input_cost_per_video_per_second": 0.002, - "input_cost_per_token": 5e-07, - "input_cost_per_character": 1.25e-07, - "output_cost_per_token": 1.5e-06, - "output_cost_per_character": 3.75e-07, + "input_cost_per_token": 5e-7, + "input_cost_per_character": 1.25e-7, + "output_cost_per_token": 0.0000015, + "output_cost_per_character": 3.75e-7, "litellm_provider": "vertex_ai-language-models", "mode": "chat", "supports_function_calling": true, @@ -6335,10 +6934,10 @@ "max_output_tokens": 8192, "input_cost_per_image": 0.0025, "input_cost_per_video_per_second": 0.002, - "input_cost_per_token": 5e-07, - "input_cost_per_character": 1.25e-07, - "output_cost_per_token": 1.5e-06, - "output_cost_per_character": 3.75e-07, + "input_cost_per_token": 5e-7, + "input_cost_per_character": 1.25e-7, + "output_cost_per_token": 0.0000015, + "output_cost_per_character": 3.75e-7, "litellm_provider": "vertex_ai-language-models", "mode": "chat", "supports_function_calling": true, @@ -6352,10 +6951,10 @@ "max_output_tokens": 8192, "input_cost_per_image": 0.0025, "input_cost_per_video_per_second": 0.002, - "input_cost_per_token": 5e-07, - "input_cost_per_character": 1.25e-07, - "output_cost_per_token": 1.5e-06, - "output_cost_per_character": 3.75e-07, + "input_cost_per_token": 5e-7, + "input_cost_per_character": 1.25e-7, + "output_cost_per_token": 0.0000015, + "output_cost_per_character": 3.75e-7, "litellm_provider": "vertex_ai-language-models", "mode": "chat", "supports_function_calling": true, @@ -6370,10 +6969,10 @@ "max_output_tokens": 2048, "input_cost_per_image": 0.0025, "input_cost_per_video_per_second": 0.002, - "input_cost_per_token": 5e-07, - "input_cost_per_character": 1.25e-07, - "output_cost_per_token": 1.5e-06, - "output_cost_per_character": 3.75e-07, + "input_cost_per_token": 5e-7, + "input_cost_per_character": 1.25e-7, + "output_cost_per_token": 0.0000015, + "output_cost_per_character": 3.75e-7, "litellm_provider": "vertex_ai-language-models", "mode": "chat", "supports_function_calling": true, @@ -6387,10 +6986,10 @@ "max_output_tokens": 2048, "input_cost_per_image": 0.0025, "input_cost_per_video_per_second": 0.002, - "input_cost_per_token": 5e-07, - "input_cost_per_character": 1.25e-07, - "output_cost_per_token": 1.5e-06, - "output_cost_per_character": 3.75e-07, + "input_cost_per_token": 5e-7, + "input_cost_per_character": 1.25e-7, + "output_cost_per_token": 0.0000015, + "output_cost_per_character": 3.75e-7, "litellm_provider": "vertex_ai-language-models", "mode": "chat", "supports_function_calling": true, @@ -6404,10 +7003,10 @@ "max_output_tokens": 8192, "input_cost_per_image": 0.0025, "input_cost_per_video_per_second": 0.002, - "input_cost_per_token": 5e-07, - "input_cost_per_character": 1.25e-07, - "output_cost_per_token": 1.5e-06, - "output_cost_per_character": 3.75e-07, + "input_cost_per_token": 5e-7, + "input_cost_per_character": 1.25e-7, + "output_cost_per_token": 0.0000015, + "output_cost_per_character": 3.75e-7, "litellm_provider": "vertex_ai-language-models", "mode": "chat", "supports_function_calling": true, @@ -6421,19 +7020,19 @@ "max_input_tokens": 2097152, "max_output_tokens": 8192, "input_cost_per_image": 0.00032875, - "input_cost_per_audio_per_second": 3.125e-05, + "input_cost_per_audio_per_second": 0.00003125, "input_cost_per_video_per_second": 0.00032875, - "input_cost_per_token": 1.25e-06, - "input_cost_per_character": 3.125e-07, + "input_cost_per_token": 0.00000125, + "input_cost_per_character": 3.125e-7, "input_cost_per_image_above_128k_tokens": 0.0006575, "input_cost_per_video_per_second_above_128k_tokens": 0.0006575, - "input_cost_per_audio_per_second_above_128k_tokens": 6.25e-05, - "input_cost_per_token_above_128k_tokens": 2.5e-06, - "input_cost_per_character_above_128k_tokens": 6.25e-07, - "output_cost_per_token": 5e-06, - "output_cost_per_character": 1.25e-06, - "output_cost_per_token_above_128k_tokens": 1e-05, - "output_cost_per_character_above_128k_tokens": 2.5e-06, + "input_cost_per_audio_per_second_above_128k_tokens": 0.0000625, + "input_cost_per_token_above_128k_tokens": 0.0000025, + "input_cost_per_character_above_128k_tokens": 6.25e-7, + "output_cost_per_token": 0.000005, + "output_cost_per_character": 0.00000125, + "output_cost_per_token_above_128k_tokens": 0.00001, + "output_cost_per_character_above_128k_tokens": 0.0000025, "litellm_provider": "vertex_ai-language-models", "mode": "chat", "supports_vision": true, @@ -6450,19 +7049,19 @@ "max_input_tokens": 2097152, "max_output_tokens": 8192, "input_cost_per_image": 0.00032875, - "input_cost_per_audio_per_second": 3.125e-05, + "input_cost_per_audio_per_second": 0.00003125, "input_cost_per_video_per_second": 0.00032875, - "input_cost_per_token": 1.25e-06, - "input_cost_per_character": 3.125e-07, + "input_cost_per_token": 0.00000125, + "input_cost_per_character": 3.125e-7, "input_cost_per_image_above_128k_tokens": 0.0006575, "input_cost_per_video_per_second_above_128k_tokens": 0.0006575, - "input_cost_per_audio_per_second_above_128k_tokens": 6.25e-05, - "input_cost_per_token_above_128k_tokens": 2.5e-06, - "input_cost_per_character_above_128k_tokens": 6.25e-07, - "output_cost_per_token": 5e-06, - "output_cost_per_character": 1.25e-06, - "output_cost_per_token_above_128k_tokens": 1e-05, - "output_cost_per_character_above_128k_tokens": 2.5e-06, + "input_cost_per_audio_per_second_above_128k_tokens": 0.0000625, + "input_cost_per_token_above_128k_tokens": 0.0000025, + "input_cost_per_character_above_128k_tokens": 6.25e-7, + "output_cost_per_token": 0.000005, + "output_cost_per_character": 0.00000125, + "output_cost_per_token_above_128k_tokens": 0.00001, + "output_cost_per_character_above_128k_tokens": 0.0000025, "litellm_provider": "vertex_ai-language-models", "mode": "chat", "supports_vision": true, @@ -6479,19 +7078,19 @@ "max_input_tokens": 1000000, "max_output_tokens": 8192, "input_cost_per_image": 0.00032875, - "input_cost_per_audio_per_second": 3.125e-05, + "input_cost_per_audio_per_second": 0.00003125, "input_cost_per_video_per_second": 0.00032875, - "input_cost_per_token": 1.25e-06, - "input_cost_per_character": 3.125e-07, + "input_cost_per_token": 0.00000125, + "input_cost_per_character": 3.125e-7, "input_cost_per_image_above_128k_tokens": 0.0006575, "input_cost_per_video_per_second_above_128k_tokens": 0.0006575, - "input_cost_per_audio_per_second_above_128k_tokens": 6.25e-05, - "input_cost_per_token_above_128k_tokens": 2.5e-06, - "input_cost_per_character_above_128k_tokens": 6.25e-07, - "output_cost_per_token": 5e-06, - "output_cost_per_character": 1.25e-06, - "output_cost_per_token_above_128k_tokens": 1e-05, - "output_cost_per_character_above_128k_tokens": 2.5e-06, + "input_cost_per_audio_per_second_above_128k_tokens": 0.0000625, + "input_cost_per_token_above_128k_tokens": 0.0000025, + "input_cost_per_character_above_128k_tokens": 6.25e-7, + "output_cost_per_token": 0.000005, + "output_cost_per_character": 0.00000125, + "output_cost_per_token_above_128k_tokens": 0.00001, + "output_cost_per_character_above_128k_tokens": 0.0000025, "litellm_provider": "vertex_ai-language-models", "mode": "chat", "supports_vision": true, @@ -6508,19 +7107,19 @@ "max_input_tokens": 1000000, "max_output_tokens": 8192, "input_cost_per_image": 0.00032875, - "input_cost_per_audio_per_second": 3.125e-05, + "input_cost_per_audio_per_second": 0.00003125, "input_cost_per_video_per_second": 0.00032875, - "input_cost_per_token": 7.8125e-08, - "input_cost_per_character": 3.125e-07, + "input_cost_per_token": 7.8125e-8, + "input_cost_per_character": 3.125e-7, "input_cost_per_image_above_128k_tokens": 0.0006575, "input_cost_per_video_per_second_above_128k_tokens": 0.0006575, - "input_cost_per_audio_per_second_above_128k_tokens": 6.25e-05, - "input_cost_per_token_above_128k_tokens": 1.5625e-07, - "input_cost_per_character_above_128k_tokens": 6.25e-07, - "output_cost_per_token": 3.125e-07, - "output_cost_per_character": 1.25e-06, - "output_cost_per_token_above_128k_tokens": 6.25e-07, - "output_cost_per_character_above_128k_tokens": 2.5e-06, + "input_cost_per_audio_per_second_above_128k_tokens": 0.0000625, + "input_cost_per_token_above_128k_tokens": 1.5625e-7, + "input_cost_per_character_above_128k_tokens": 6.25e-7, + "output_cost_per_token": 3.125e-7, + "output_cost_per_character": 0.00000125, + "output_cost_per_token_above_128k_tokens": 6.25e-7, + "output_cost_per_character_above_128k_tokens": 0.0000025, "litellm_provider": "vertex_ai-language-models", "mode": "chat", "supports_system_messages": true, @@ -6535,19 +7134,19 @@ "max_input_tokens": 1000000, "max_output_tokens": 8192, "input_cost_per_image": 0.00032875, - "input_cost_per_audio_per_second": 3.125e-05, + "input_cost_per_audio_per_second": 0.00003125, "input_cost_per_video_per_second": 0.00032875, - "input_cost_per_token": 7.8125e-08, - "input_cost_per_character": 3.125e-07, + "input_cost_per_token": 7.8125e-8, + "input_cost_per_character": 3.125e-7, "input_cost_per_image_above_128k_tokens": 0.0006575, "input_cost_per_video_per_second_above_128k_tokens": 0.0006575, - "input_cost_per_audio_per_second_above_128k_tokens": 6.25e-05, - "input_cost_per_token_above_128k_tokens": 1.5625e-07, - "input_cost_per_character_above_128k_tokens": 6.25e-07, - "output_cost_per_token": 3.125e-07, - "output_cost_per_character": 1.25e-06, - "output_cost_per_token_above_128k_tokens": 6.25e-07, - "output_cost_per_character_above_128k_tokens": 2.5e-06, + "input_cost_per_audio_per_second_above_128k_tokens": 0.0000625, + "input_cost_per_token_above_128k_tokens": 1.5625e-7, + "input_cost_per_character_above_128k_tokens": 6.25e-7, + "output_cost_per_token": 3.125e-7, + "output_cost_per_character": 0.00000125, + "output_cost_per_token_above_128k_tokens": 6.25e-7, + "output_cost_per_character_above_128k_tokens": 0.0000025, "litellm_provider": "vertex_ai-language-models", "mode": "chat", "supports_system_messages": true, @@ -6562,19 +7161,19 @@ "max_input_tokens": 1000000, "max_output_tokens": 8192, "input_cost_per_image": 0.00032875, - "input_cost_per_audio_per_second": 3.125e-05, + "input_cost_per_audio_per_second": 0.00003125, "input_cost_per_video_per_second": 0.00032875, - "input_cost_per_token": 7.8125e-08, - "input_cost_per_character": 3.125e-07, + "input_cost_per_token": 7.8125e-8, + "input_cost_per_character": 3.125e-7, "input_cost_per_image_above_128k_tokens": 0.0006575, "input_cost_per_video_per_second_above_128k_tokens": 0.0006575, - "input_cost_per_audio_per_second_above_128k_tokens": 6.25e-05, - "input_cost_per_token_above_128k_tokens": 1.5625e-07, - "input_cost_per_character_above_128k_tokens": 6.25e-07, - "output_cost_per_token": 3.125e-07, - "output_cost_per_character": 1.25e-06, - "output_cost_per_token_above_128k_tokens": 6.25e-07, - "output_cost_per_character_above_128k_tokens": 2.5e-06, + "input_cost_per_audio_per_second_above_128k_tokens": 0.0000625, + "input_cost_per_token_above_128k_tokens": 1.5625e-7, + "input_cost_per_character_above_128k_tokens": 6.25e-7, + "output_cost_per_token": 3.125e-7, + "output_cost_per_character": 0.00000125, + "output_cost_per_token_above_128k_tokens": 6.25e-7, + "output_cost_per_character_above_128k_tokens": 0.0000025, "litellm_provider": "vertex_ai-language-models", "mode": "chat", "supports_function_calling": true, @@ -6593,20 +7192,20 @@ "max_audio_length_hours": 8.4, "max_audio_per_prompt": 1, "max_pdf_size_mb": 30, - "input_cost_per_image": 2e-05, - "input_cost_per_video_per_second": 2e-05, - "input_cost_per_audio_per_second": 2e-06, - "input_cost_per_token": 7.5e-08, - "input_cost_per_character": 1.875e-08, - "input_cost_per_token_above_128k_tokens": 1e-06, - "input_cost_per_character_above_128k_tokens": 2.5e-07, - "input_cost_per_image_above_128k_tokens": 4e-05, - "input_cost_per_video_per_second_above_128k_tokens": 4e-05, - "input_cost_per_audio_per_second_above_128k_tokens": 4e-06, - "output_cost_per_token": 3e-07, - "output_cost_per_character": 7.5e-08, - "output_cost_per_token_above_128k_tokens": 6e-07, - "output_cost_per_character_above_128k_tokens": 1.5e-07, + "input_cost_per_image": 0.00002, + "input_cost_per_video_per_second": 0.00002, + "input_cost_per_audio_per_second": 0.000002, + "input_cost_per_token": 7.5e-8, + "input_cost_per_character": 1.875e-8, + "input_cost_per_token_above_128k_tokens": 0.000001, + "input_cost_per_character_above_128k_tokens": 2.5e-7, + "input_cost_per_image_above_128k_tokens": 0.00004, + "input_cost_per_video_per_second_above_128k_tokens": 0.00004, + "input_cost_per_audio_per_second_above_128k_tokens": 0.000004, + "output_cost_per_token": 3e-7, + "output_cost_per_character": 7.5e-8, + "output_cost_per_token_above_128k_tokens": 6e-7, + "output_cost_per_character_above_128k_tokens": 1.5e-7, "litellm_provider": "vertex_ai-language-models", "mode": "chat", "supports_system_messages": true, @@ -6627,20 +7226,20 @@ "max_audio_length_hours": 8.4, "max_audio_per_prompt": 1, "max_pdf_size_mb": 30, - "input_cost_per_image": 2e-05, - "input_cost_per_video_per_second": 2e-05, - "input_cost_per_audio_per_second": 2e-06, - "input_cost_per_token": 4.688e-09, - "input_cost_per_character": 1.875e-08, - "input_cost_per_token_above_128k_tokens": 1e-06, - "input_cost_per_character_above_128k_tokens": 2.5e-07, - "input_cost_per_image_above_128k_tokens": 4e-05, - "input_cost_per_video_per_second_above_128k_tokens": 4e-05, - "input_cost_per_audio_per_second_above_128k_tokens": 4e-06, - "output_cost_per_token": 4.6875e-09, - "output_cost_per_character": 1.875e-08, - "output_cost_per_token_above_128k_tokens": 9.375e-09, - "output_cost_per_character_above_128k_tokens": 3.75e-08, + "input_cost_per_image": 0.00002, + "input_cost_per_video_per_second": 0.00002, + "input_cost_per_audio_per_second": 0.000002, + "input_cost_per_token": 4.688e-9, + "input_cost_per_character": 1.875e-8, + "input_cost_per_token_above_128k_tokens": 0.000001, + "input_cost_per_character_above_128k_tokens": 2.5e-7, + "input_cost_per_image_above_128k_tokens": 0.00004, + "input_cost_per_video_per_second_above_128k_tokens": 0.00004, + "input_cost_per_audio_per_second_above_128k_tokens": 0.000004, + "output_cost_per_token": 4.6875e-9, + "output_cost_per_character": 1.875e-8, + "output_cost_per_token_above_128k_tokens": 9.375e-9, + "output_cost_per_character_above_128k_tokens": 3.75e-8, "litellm_provider": "vertex_ai-language-models", "mode": "chat", "supports_system_messages": true, @@ -6661,20 +7260,20 @@ "max_audio_length_hours": 8.4, "max_audio_per_prompt": 1, "max_pdf_size_mb": 30, - "input_cost_per_image": 2e-05, - "input_cost_per_video_per_second": 2e-05, - "input_cost_per_audio_per_second": 2e-06, - "input_cost_per_token": 7.5e-08, - "input_cost_per_character": 1.875e-08, - "input_cost_per_token_above_128k_tokens": 1e-06, - "input_cost_per_character_above_128k_tokens": 2.5e-07, - "input_cost_per_image_above_128k_tokens": 4e-05, - "input_cost_per_video_per_second_above_128k_tokens": 4e-05, - "input_cost_per_audio_per_second_above_128k_tokens": 4e-06, - "output_cost_per_token": 3e-07, - "output_cost_per_character": 7.5e-08, - "output_cost_per_token_above_128k_tokens": 6e-07, - "output_cost_per_character_above_128k_tokens": 1.5e-07, + "input_cost_per_image": 0.00002, + "input_cost_per_video_per_second": 0.00002, + "input_cost_per_audio_per_second": 0.000002, + "input_cost_per_token": 7.5e-8, + "input_cost_per_character": 1.875e-8, + "input_cost_per_token_above_128k_tokens": 0.000001, + "input_cost_per_character_above_128k_tokens": 2.5e-7, + "input_cost_per_image_above_128k_tokens": 0.00004, + "input_cost_per_video_per_second_above_128k_tokens": 0.00004, + "input_cost_per_audio_per_second_above_128k_tokens": 0.000004, + "output_cost_per_token": 3e-7, + "output_cost_per_character": 7.5e-8, + "output_cost_per_token_above_128k_tokens": 6e-7, + "output_cost_per_character_above_128k_tokens": 1.5e-7, "litellm_provider": "vertex_ai-language-models", "mode": "chat", "supports_system_messages": true, @@ -6696,20 +7295,20 @@ "max_audio_length_hours": 8.4, "max_audio_per_prompt": 1, "max_pdf_size_mb": 30, - "input_cost_per_image": 2e-05, - "input_cost_per_video_per_second": 2e-05, - "input_cost_per_audio_per_second": 2e-06, - "input_cost_per_token": 7.5e-08, - "input_cost_per_character": 1.875e-08, - "input_cost_per_token_above_128k_tokens": 1e-06, - "input_cost_per_character_above_128k_tokens": 2.5e-07, - "input_cost_per_image_above_128k_tokens": 4e-05, - "input_cost_per_video_per_second_above_128k_tokens": 4e-05, - "input_cost_per_audio_per_second_above_128k_tokens": 4e-06, - "output_cost_per_token": 3e-07, - "output_cost_per_character": 7.5e-08, - "output_cost_per_token_above_128k_tokens": 6e-07, - "output_cost_per_character_above_128k_tokens": 1.5e-07, + "input_cost_per_image": 0.00002, + "input_cost_per_video_per_second": 0.00002, + "input_cost_per_audio_per_second": 0.000002, + "input_cost_per_token": 7.5e-8, + "input_cost_per_character": 1.875e-8, + "input_cost_per_token_above_128k_tokens": 0.000001, + "input_cost_per_character_above_128k_tokens": 2.5e-7, + "input_cost_per_image_above_128k_tokens": 0.00004, + "input_cost_per_video_per_second_above_128k_tokens": 0.00004, + "input_cost_per_audio_per_second_above_128k_tokens": 0.000004, + "output_cost_per_token": 3e-7, + "output_cost_per_character": 7.5e-8, + "output_cost_per_token_above_128k_tokens": 6e-7, + "output_cost_per_character_above_128k_tokens": 1.5e-7, "litellm_provider": "vertex_ai-language-models", "mode": "chat", "supports_system_messages": true, @@ -6731,20 +7330,20 @@ "max_audio_length_hours": 8.4, "max_audio_per_prompt": 1, "max_pdf_size_mb": 30, - "input_cost_per_image": 2e-05, - "input_cost_per_video_per_second": 2e-05, - "input_cost_per_audio_per_second": 2e-06, - "input_cost_per_token": 7.5e-08, - "input_cost_per_character": 1.875e-08, - "input_cost_per_token_above_128k_tokens": 1e-06, - "input_cost_per_character_above_128k_tokens": 2.5e-07, - "input_cost_per_image_above_128k_tokens": 4e-05, - "input_cost_per_video_per_second_above_128k_tokens": 4e-05, - "input_cost_per_audio_per_second_above_128k_tokens": 4e-06, - "output_cost_per_token": 4.6875e-09, - "output_cost_per_character": 1.875e-08, - "output_cost_per_token_above_128k_tokens": 9.375e-09, - "output_cost_per_character_above_128k_tokens": 3.75e-08, + "input_cost_per_image": 0.00002, + "input_cost_per_video_per_second": 0.00002, + "input_cost_per_audio_per_second": 0.000002, + "input_cost_per_token": 7.5e-8, + "input_cost_per_character": 1.875e-8, + "input_cost_per_token_above_128k_tokens": 0.000001, + "input_cost_per_character_above_128k_tokens": 2.5e-7, + "input_cost_per_image_above_128k_tokens": 0.00004, + "input_cost_per_video_per_second_above_128k_tokens": 0.00004, + "input_cost_per_audio_per_second_above_128k_tokens": 0.000004, + "output_cost_per_token": 4.6875e-9, + "output_cost_per_character": 1.875e-8, + "output_cost_per_token_above_128k_tokens": 9.375e-9, + "output_cost_per_character_above_128k_tokens": 3.75e-8, "litellm_provider": "vertex_ai-language-models", "mode": "chat", "supports_system_messages": true, @@ -6791,8 +7390,8 @@ "max_images_per_prompt": 16, "max_videos_per_prompt": 1, "max_video_length": 2, - "input_cost_per_token": 5e-07, - "output_cost_per_token": 1.5e-06, + "input_cost_per_token": 5e-7, + "output_cost_per_token": 0.0000015, "input_cost_per_image": 0.0025, "litellm_provider": "vertex_ai-vision-models", "mode": "chat", @@ -6809,8 +7408,8 @@ "max_images_per_prompt": 16, "max_videos_per_prompt": 1, "max_video_length": 2, - "input_cost_per_token": 5e-07, - "output_cost_per_token": 1.5e-06, + "input_cost_per_token": 5e-7, + "output_cost_per_token": 0.0000015, "input_cost_per_image": 0.0025, "litellm_provider": "vertex_ai-vision-models", "mode": "chat", @@ -6827,8 +7426,8 @@ "max_images_per_prompt": 16, "max_videos_per_prompt": 1, "max_video_length": 2, - "input_cost_per_token": 5e-07, - "output_cost_per_token": 1.5e-06, + "input_cost_per_token": 5e-7, + "output_cost_per_token": 0.0000015, "input_cost_per_image": 0.0025, "litellm_provider": "vertex_ai-vision-models", "mode": "chat", @@ -6843,8 +7442,8 @@ "max_tokens": 8192, "max_input_tokens": 32768, "max_output_tokens": 8192, - "input_cost_per_character": 5e-07, - "output_cost_per_character": 1e-06, + "input_cost_per_character": 5e-7, + "output_cost_per_character": 0.000001, "litellm_provider": "vertex_ai-language-models", "mode": "chat", "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models", @@ -6854,8 +7453,8 @@ "max_tokens": 1024, "max_input_tokens": 8192, "max_output_tokens": 1024, - "input_cost_per_character": 5e-06, - "output_cost_per_character": 1.5e-05, + "input_cost_per_character": 0.000005, + "output_cost_per_character": 0.000015, "litellm_provider": "vertex_ai-language-models", "mode": "chat", "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models", @@ -6871,10 +7470,10 @@ "max_audio_length_hours": 8.4, "max_audio_per_prompt": 1, "max_pdf_size_mb": 30, - "input_cost_per_token": 1.25e-06, - "input_cost_per_token_above_200k_tokens": 2.5e-06, - "output_cost_per_token": 1e-05, - "output_cost_per_token_above_200k_tokens": 1.5e-05, + "input_cost_per_token": 0.00000125, + "input_cost_per_token_above_200k_tokens": 0.0000025, + "output_cost_per_token": 0.00001, + "output_cost_per_token_above_200k_tokens": 0.000015, "litellm_provider": "vertex_ai-language-models", "mode": "chat", "supports_system_messages": true, @@ -6901,7 +7500,7 @@ "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing", "supports_parallel_function_calling": true, "supports_web_search": true, - "cache_read_input_token_cost": 3.125e-07, + "cache_read_input_token_cost": 3.125e-7, "supports_prompt_caching": true }, "gemini-2.0-pro-exp-02-05": { @@ -6914,10 +7513,10 @@ "max_audio_length_hours": 8.4, "max_audio_per_prompt": 1, "max_pdf_size_mb": 30, - "input_cost_per_token": 1.25e-06, - "input_cost_per_token_above_200k_tokens": 2.5e-06, - "output_cost_per_token": 1e-05, - "output_cost_per_token_above_200k_tokens": 1.5e-05, + "input_cost_per_token": 0.00000125, + "input_cost_per_token_above_200k_tokens": 0.0000025, + "output_cost_per_token": 0.00001, + "output_cost_per_token_above_200k_tokens": 0.000015, "litellm_provider": "vertex_ai-language-models", "mode": "chat", "supports_system_messages": true, @@ -6944,7 +7543,7 @@ "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing", "supports_parallel_function_calling": true, "supports_web_search": true, - "cache_read_input_token_cost": 3.125e-07, + "cache_read_input_token_cost": 3.125e-7, "supports_prompt_caching": true }, "gemini-2.0-flash-exp": { @@ -6960,14 +7559,14 @@ "input_cost_per_image": 0, "input_cost_per_video_per_second": 0, "input_cost_per_audio_per_second": 0, - "input_cost_per_token": 1.5e-07, + "input_cost_per_token": 1.5e-7, "input_cost_per_character": 0, "input_cost_per_token_above_128k_tokens": 0, "input_cost_per_character_above_128k_tokens": 0, "input_cost_per_image_above_128k_tokens": 0, "input_cost_per_video_per_second_above_128k_tokens": 0, "input_cost_per_audio_per_second_above_128k_tokens": 0, - "output_cost_per_token": 6e-07, + "output_cost_per_token": 6e-7, "output_cost_per_character": 0, "output_cost_per_token_above_128k_tokens": 0, "output_cost_per_character_above_128k_tokens": 0, @@ -6992,7 +7591,7 @@ "supports_tool_choice": true, "supports_parallel_function_calling": true, "supports_web_search": true, - "cache_read_input_token_cost": 3.75e-08, + "cache_read_input_token_cost": 3.75e-8, "supports_prompt_caching": true }, "gemini-2.0-flash-001": { @@ -7005,9 +7604,9 @@ "max_audio_length_hours": 8.4, "max_audio_per_prompt": 1, "max_pdf_size_mb": 30, - "input_cost_per_audio_token": 1e-06, - "input_cost_per_token": 1.5e-07, - "output_cost_per_token": 6e-07, + "input_cost_per_audio_token": 0.000001, + "input_cost_per_token": 1.5e-7, + "output_cost_per_token": 6e-7, "litellm_provider": "vertex_ai-language-models", "mode": "chat", "supports_system_messages": true, @@ -7030,7 +7629,7 @@ "deprecation_date": "2026-02-05", "supports_parallel_function_calling": true, "supports_web_search": true, - "cache_read_input_token_cost": 3.75e-08, + "cache_read_input_token_cost": 3.75e-8, "supports_prompt_caching": true }, "gemini-2.0-flash-thinking-exp": { @@ -7078,7 +7677,7 @@ "supports_tool_choice": true, "supports_parallel_function_calling": true, "supports_web_search": true, - "cache_read_input_token_cost": 0.0, + "cache_read_input_token_cost": 0, "supports_prompt_caching": true }, "gemini-2.0-flash-thinking-exp-01-21": { @@ -7126,7 +7725,7 @@ "supports_tool_choice": true, "supports_parallel_function_calling": true, "supports_web_search": true, - "cache_read_input_token_cost": 0.0, + "cache_read_input_token_cost": 0, "supports_prompt_caching": true }, "gemini-2.5-pro": { @@ -7139,10 +7738,10 @@ "max_audio_length_hours": 8.4, "max_audio_per_prompt": 1, "max_pdf_size_mb": 30, - "input_cost_per_token": 1.25e-06, - "input_cost_per_token_above_200k_tokens": 2.5e-06, - "output_cost_per_token": 1e-05, - "output_cost_per_token_above_200k_tokens": 1.5e-05, + "input_cost_per_token": 0.00000125, + "input_cost_per_token_above_200k_tokens": 0.0000025, + "output_cost_per_token": 0.00001, + "output_cost_per_token_above_200k_tokens": 0.000015, "litellm_provider": "vertex_ai-language-models", "mode": "chat", "supports_system_messages": true, @@ -7169,7 +7768,7 @@ ], "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing", "supports_web_search": true, - "cache_read_input_token_cost": 3.125e-07, + "cache_read_input_token_cost": 3.125e-7, "supports_prompt_caching": true }, "gemini/gemini-2.5-pro-exp-03-25": { @@ -7182,10 +7781,10 @@ "max_audio_length_hours": 8.4, "max_audio_per_prompt": 1, "max_pdf_size_mb": 30, - "input_cost_per_token": 0.0, - "input_cost_per_token_above_200k_tokens": 0.0, - "output_cost_per_token": 0.0, - "output_cost_per_token_above_200k_tokens": 0.0, + "input_cost_per_token": 0, + "input_cost_per_token_above_200k_tokens": 0, + "output_cost_per_token": 0, + "output_cost_per_token_above_200k_tokens": 0, "litellm_provider": "gemini", "mode": "chat", "rpm": 5, @@ -7213,7 +7812,7 @@ ], "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing", "supports_web_search": true, - "cache_read_input_token_cost": 0.0, + "cache_read_input_token_cost": 0, "supports_prompt_caching": true }, "gemini/gemini-2.5-pro": { @@ -7226,10 +7825,10 @@ "max_audio_length_hours": 8.4, "max_audio_per_prompt": 1, "max_pdf_size_mb": 30, - "input_cost_per_token": 1.25e-06, - "input_cost_per_token_above_200k_tokens": 2.5e-06, - "output_cost_per_token": 1e-05, - "output_cost_per_token_above_200k_tokens": 1.5e-05, + "input_cost_per_token": 0.00000125, + "input_cost_per_token_above_200k_tokens": 0.0000025, + "output_cost_per_token": 0.00001, + "output_cost_per_token_above_200k_tokens": 0.000015, "litellm_provider": "gemini", "mode": "chat", "rpm": 2000, @@ -7258,7 +7857,7 @@ ], "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing", "supports_web_search": true, - "cache_read_input_token_cost": 3.125e-07, + "cache_read_input_token_cost": 3.125e-7, "supports_prompt_caching": true }, "gemini/gemini-2.5-flash": { @@ -7271,10 +7870,10 @@ "max_audio_length_hours": 8.4, "max_audio_per_prompt": 1, "max_pdf_size_mb": 30, - "input_cost_per_audio_token": 1e-06, - "input_cost_per_token": 3e-07, - "output_cost_per_token": 2.5e-06, - "output_cost_per_reasoning_token": 2.5e-06, + "input_cost_per_audio_token": 0.000001, + "input_cost_per_token": 3e-7, + "output_cost_per_token": 0.0000025, + "output_cost_per_reasoning_token": 0.0000025, "litellm_provider": "gemini", "mode": "chat", "supports_reasoning": true, @@ -7305,7 +7904,7 @@ "tpm": 8000000, "rpm": 100000, "supports_pdf_input": true, - "cache_read_input_token_cost": 7.5e-08, + "cache_read_input_token_cost": 7.5e-8, "supports_prompt_caching": true }, "gemini-2.5-flash": { @@ -7318,10 +7917,10 @@ "max_audio_length_hours": 8.4, "max_audio_per_prompt": 1, "max_pdf_size_mb": 30, - "input_cost_per_audio_token": 1e-06, - "input_cost_per_token": 3e-07, - "output_cost_per_token": 2.5e-06, - "output_cost_per_reasoning_token": 2.5e-06, + "input_cost_per_audio_token": 0.000001, + "input_cost_per_token": 3e-7, + "output_cost_per_token": 0.0000025, + "output_cost_per_reasoning_token": 0.0000025, "litellm_provider": "vertex_ai-language-models", "mode": "chat", "supports_reasoning": true, @@ -7350,7 +7949,7 @@ "supports_web_search": true, "supports_url_context": true, "supports_pdf_input": true, - "cache_read_input_token_cost": 7.5e-08, + "cache_read_input_token_cost": 7.5e-8, "supports_prompt_caching": true }, "gemini/gemini-2.0-flash-live-001": { @@ -7363,12 +7962,12 @@ "max_audio_length_hours": 8.4, "max_audio_per_prompt": 1, "max_pdf_size_mb": 30, - "input_cost_per_token": 3.5e-07, - "input_cost_per_audio_token": 2.1e-06, - "input_cost_per_image": 2.1e-06, - "input_cost_per_video_per_second": 2.1e-06, - "output_cost_per_token": 1.5e-06, - "output_cost_per_audio_token": 8.5e-06, + "input_cost_per_token": 3.5e-7, + "input_cost_per_audio_token": 0.0000021, + "input_cost_per_image": 0.0000021, + "input_cost_per_video_per_second": 0.0000021, + "output_cost_per_token": 0.0000015, + "output_cost_per_audio_token": 0.0000085, "litellm_provider": "gemini", "mode": "chat", "rpm": 10, @@ -7398,7 +7997,7 @@ "supports_web_search": true, "supports_url_context": true, "supports_pdf_input": true, - "cache_read_input_token_cost": 7.5e-08, + "cache_read_input_token_cost": 7.5e-8, "supports_prompt_caching": true }, "gemini/gemini-2.5-flash-preview-tts": { @@ -7411,10 +8010,10 @@ "max_audio_length_hours": 8.4, "max_audio_per_prompt": 1, "max_pdf_size_mb": 30, - "input_cost_per_audio_token": 1e-06, - "input_cost_per_token": 1.5e-07, - "output_cost_per_token": 6e-07, - "output_cost_per_reasoning_token": 3.5e-06, + "input_cost_per_audio_token": 0.000001, + "input_cost_per_token": 1.5e-7, + "output_cost_per_token": 6e-7, + "output_cost_per_reasoning_token": 0.0000035, "litellm_provider": "gemini", "mode": "chat", "rpm": 10, @@ -7438,7 +8037,7 @@ ], "source": "https://ai.google.dev/gemini-api/docs/models#gemini-2.5-flash-preview", "supports_web_search": true, - "cache_read_input_token_cost": 3.75e-08, + "cache_read_input_token_cost": 3.75e-8, "supports_prompt_caching": true }, "gemini/gemini-2.5-flash-preview-05-20": { @@ -7451,10 +8050,10 @@ "max_audio_length_hours": 8.4, "max_audio_per_prompt": 1, "max_pdf_size_mb": 30, - "input_cost_per_audio_token": 1e-06, - "input_cost_per_token": 3e-07, - "output_cost_per_token": 2.5e-06, - "output_cost_per_reasoning_token": 2.5e-06, + "input_cost_per_audio_token": 0.000001, + "input_cost_per_token": 3e-7, + "output_cost_per_token": 0.0000025, + "output_cost_per_reasoning_token": 0.0000025, "litellm_provider": "gemini", "mode": "chat", "rpm": 10, @@ -7483,7 +8082,7 @@ "supports_web_search": true, "supports_url_context": true, "supports_pdf_input": true, - "cache_read_input_token_cost": 7.5e-08, + "cache_read_input_token_cost": 7.5e-8, "supports_prompt_caching": true }, "gemini/gemini-2.5-flash-preview-04-17": { @@ -7496,10 +8095,10 @@ "max_audio_length_hours": 8.4, "max_audio_per_prompt": 1, "max_pdf_size_mb": 30, - "input_cost_per_audio_token": 1e-06, - "input_cost_per_token": 1.5e-07, - "output_cost_per_token": 6e-07, - "output_cost_per_reasoning_token": 3.5e-06, + "input_cost_per_audio_token": 0.000001, + "input_cost_per_token": 1.5e-7, + "output_cost_per_token": 6e-7, + "output_cost_per_reasoning_token": 0.0000035, "litellm_provider": "gemini", "mode": "chat", "rpm": 10, @@ -7527,7 +8126,7 @@ "source": "https://ai.google.dev/gemini-api/docs/models#gemini-2.5-flash-preview", "supports_web_search": true, "supports_pdf_input": true, - "cache_read_input_token_cost": 3.75e-08, + "cache_read_input_token_cost": 3.75e-8, "supports_prompt_caching": true }, "gemini/gemini-2.5-flash-lite-preview-06-17": { @@ -7540,10 +8139,10 @@ "max_audio_length_hours": 8.4, "max_audio_per_prompt": 1, "max_pdf_size_mb": 30, - "input_cost_per_audio_token": 5e-07, - "input_cost_per_token": 1e-07, - "output_cost_per_token": 4e-07, - "output_cost_per_reasoning_token": 4e-07, + "input_cost_per_audio_token": 5e-7, + "input_cost_per_token": 1e-7, + "output_cost_per_token": 4e-7, + "output_cost_per_reasoning_token": 4e-7, "litellm_provider": "gemini", "mode": "chat", "rpm": 15, @@ -7574,7 +8173,7 @@ "supports_web_search": true, "supports_url_context": true, "supports_pdf_input": true, - "cache_read_input_token_cost": 2.5e-08, + "cache_read_input_token_cost": 2.5e-8, "supports_prompt_caching": true }, "gemini/gemini-2.5-flash-lite": { @@ -7587,10 +8186,10 @@ "max_audio_length_hours": 8.4, "max_audio_per_prompt": 1, "max_pdf_size_mb": 30, - "input_cost_per_audio_token": 5e-07, - "input_cost_per_token": 1e-07, - "output_cost_per_token": 4e-07, - "output_cost_per_reasoning_token": 4e-07, + "input_cost_per_audio_token": 5e-7, + "input_cost_per_token": 1e-7, + "output_cost_per_token": 4e-7, + "output_cost_per_reasoning_token": 4e-7, "litellm_provider": "gemini", "mode": "chat", "rpm": 15, @@ -7621,7 +8220,7 @@ "supports_web_search": true, "supports_url_context": true, "supports_pdf_input": true, - "cache_read_input_token_cost": 2.5e-08, + "cache_read_input_token_cost": 2.5e-8, "supports_prompt_caching": true }, "gemini-2.5-flash-preview-05-20": { @@ -7634,10 +8233,10 @@ "max_audio_length_hours": 8.4, "max_audio_per_prompt": 1, "max_pdf_size_mb": 30, - "input_cost_per_audio_token": 1e-06, - "input_cost_per_token": 3e-07, - "output_cost_per_token": 2.5e-06, - "output_cost_per_reasoning_token": 2.5e-06, + "input_cost_per_audio_token": 0.000001, + "input_cost_per_token": 3e-7, + "output_cost_per_token": 0.0000025, + "output_cost_per_reasoning_token": 0.0000025, "litellm_provider": "vertex_ai-language-models", "mode": "chat", "supports_reasoning": true, @@ -7666,7 +8265,7 @@ "supports_web_search": true, "supports_url_context": true, "supports_pdf_input": true, - "cache_read_input_token_cost": 7.5e-08, + "cache_read_input_token_cost": 7.5e-8, "supports_prompt_caching": true }, "gemini-2.5-flash-preview-04-17": { @@ -7679,10 +8278,10 @@ "max_audio_length_hours": 8.4, "max_audio_per_prompt": 1, "max_pdf_size_mb": 30, - "input_cost_per_audio_token": 1e-06, - "input_cost_per_token": 1.5e-07, - "output_cost_per_token": 6e-07, - "output_cost_per_reasoning_token": 3.5e-06, + "input_cost_per_audio_token": 0.000001, + "input_cost_per_token": 1.5e-7, + "output_cost_per_token": 6e-7, + "output_cost_per_reasoning_token": 0.0000035, "litellm_provider": "vertex_ai-language-models", "mode": "chat", "supports_reasoning": true, @@ -7710,7 +8309,7 @@ "supports_parallel_function_calling": true, "supports_web_search": true, "supports_pdf_input": true, - "cache_read_input_token_cost": 3.75e-08, + "cache_read_input_token_cost": 3.75e-8, "supports_prompt_caching": true }, "gemini-2.5-flash-lite-preview-06-17": { @@ -7723,10 +8322,10 @@ "max_audio_length_hours": 8.4, "max_audio_per_prompt": 1, "max_pdf_size_mb": 30, - "input_cost_per_audio_token": 5e-07, - "input_cost_per_token": 1e-07, - "output_cost_per_token": 4e-07, - "output_cost_per_reasoning_token": 4e-07, + "input_cost_per_audio_token": 5e-7, + "input_cost_per_token": 1e-7, + "output_cost_per_token": 4e-7, + "output_cost_per_reasoning_token": 4e-7, "litellm_provider": "vertex_ai-language-models", "mode": "chat", "supports_reasoning": true, @@ -7755,7 +8354,7 @@ "supports_web_search": true, "supports_url_context": true, "supports_pdf_input": true, - "cache_read_input_token_cost": 2.5e-08, + "cache_read_input_token_cost": 2.5e-8, "supports_prompt_caching": true }, "gemini-2.5-flash-lite": { @@ -7768,10 +8367,10 @@ "max_audio_length_hours": 8.4, "max_audio_per_prompt": 1, "max_pdf_size_mb": 30, - "input_cost_per_audio_token": 5e-07, - "input_cost_per_token": 1e-07, - "output_cost_per_token": 4e-07, - "output_cost_per_reasoning_token": 4e-07, + "input_cost_per_audio_token": 5e-7, + "input_cost_per_token": 1e-7, + "output_cost_per_token": 4e-7, + "output_cost_per_reasoning_token": 4e-7, "litellm_provider": "vertex_ai-language-models", "mode": "chat", "supports_reasoning": true, @@ -7800,7 +8399,7 @@ "supports_web_search": true, "supports_url_context": true, "supports_pdf_input": true, - "cache_read_input_token_cost": 2.5e-08, + "cache_read_input_token_cost": 2.5e-8, "supports_prompt_caching": true }, "gemini-2.0-flash": { @@ -7813,9 +8412,9 @@ "max_audio_length_hours": 8.4, "max_audio_per_prompt": 1, "max_pdf_size_mb": 30, - "input_cost_per_audio_token": 7e-07, - "input_cost_per_token": 1e-07, - "output_cost_per_token": 4e-07, + "input_cost_per_audio_token": 7e-7, + "input_cost_per_token": 1e-7, + "output_cost_per_token": 4e-7, "litellm_provider": "vertex_ai-language-models", "mode": "chat", "supports_system_messages": true, @@ -7839,7 +8438,7 @@ "supports_parallel_function_calling": true, "supports_web_search": true, "supports_url_context": true, - "cache_read_input_token_cost": 2.5e-08, + "cache_read_input_token_cost": 2.5e-8, "supports_prompt_caching": true }, "gemini-2.0-flash-lite": { @@ -7851,9 +8450,9 @@ "max_audio_length_hours": 8.4, "max_audio_per_prompt": 1, "max_pdf_size_mb": 50, - "input_cost_per_audio_token": 7.5e-08, - "input_cost_per_token": 7.5e-08, - "output_cost_per_token": 3e-07, + "input_cost_per_audio_token": 7.5e-8, + "input_cost_per_token": 7.5e-8, + "output_cost_per_token": 3e-7, "litellm_provider": "vertex_ai-language-models", "mode": "chat", "supports_system_messages": true, @@ -7874,7 +8473,7 @@ "supports_tool_choice": true, "supports_parallel_function_calling": true, "supports_web_search": true, - "cache_read_input_token_cost": 1.875e-08, + "cache_read_input_token_cost": 1.875e-8, "supports_prompt_caching": true }, "gemini-2.0-flash-lite-001": { @@ -7886,9 +8485,9 @@ "max_audio_length_hours": 8.4, "max_audio_per_prompt": 1, "max_pdf_size_mb": 50, - "input_cost_per_audio_token": 7.5e-08, - "input_cost_per_token": 7.5e-08, - "output_cost_per_token": 3e-07, + "input_cost_per_audio_token": 7.5e-8, + "input_cost_per_token": 7.5e-8, + "output_cost_per_token": 3e-7, "litellm_provider": "vertex_ai-language-models", "mode": "chat", "supports_system_messages": true, @@ -7910,7 +8509,7 @@ "deprecation_date": "2026-02-25", "supports_parallel_function_calling": true, "supports_web_search": true, - "cache_read_input_token_cost": 1.875e-08, + "cache_read_input_token_cost": 1.875e-8, "supports_prompt_caching": true }, "gemini-2.5-pro-preview-06-05": { @@ -7923,11 +8522,11 @@ "max_audio_length_hours": 8.4, "max_audio_per_prompt": 1, "max_pdf_size_mb": 30, - "input_cost_per_audio_token": 1.25e-06, - "input_cost_per_token": 1.25e-06, - "input_cost_per_token_above_200k_tokens": 2.5e-06, - "output_cost_per_token": 1e-05, - "output_cost_per_token_above_200k_tokens": 1.5e-05, + "input_cost_per_audio_token": 0.00000125, + "input_cost_per_token": 0.00000125, + "input_cost_per_token_above_200k_tokens": 0.0000025, + "output_cost_per_token": 0.00001, + "output_cost_per_token_above_200k_tokens": 0.000015, "litellm_provider": "vertex_ai-language-models", "mode": "chat", "supports_reasoning": true, @@ -7955,7 +8554,7 @@ "supports_parallel_function_calling": true, "supports_web_search": true, "supports_pdf_input": true, - "cache_read_input_token_cost": 3.125e-07, + "cache_read_input_token_cost": 3.125e-7, "supports_prompt_caching": true }, "gemini-2.5-pro-preview-05-06": { @@ -7968,11 +8567,11 @@ "max_audio_length_hours": 8.4, "max_audio_per_prompt": 1, "max_pdf_size_mb": 30, - "input_cost_per_audio_token": 1.25e-06, - "input_cost_per_token": 1.25e-06, - "input_cost_per_token_above_200k_tokens": 2.5e-06, - "output_cost_per_token": 1e-05, - "output_cost_per_token_above_200k_tokens": 1.5e-05, + "input_cost_per_audio_token": 0.00000125, + "input_cost_per_token": 0.00000125, + "input_cost_per_token_above_200k_tokens": 0.0000025, + "output_cost_per_token": 0.00001, + "output_cost_per_token_above_200k_tokens": 0.000015, "litellm_provider": "vertex_ai-language-models", "mode": "chat", "supports_reasoning": true, @@ -8003,7 +8602,7 @@ "supports_parallel_function_calling": true, "supports_web_search": true, "supports_pdf_input": true, - "cache_read_input_token_cost": 3.125e-07, + "cache_read_input_token_cost": 3.125e-7, "supports_prompt_caching": true }, "gemini-2.5-pro-preview-03-25": { @@ -8016,11 +8615,11 @@ "max_audio_length_hours": 8.4, "max_audio_per_prompt": 1, "max_pdf_size_mb": 30, - "input_cost_per_audio_token": 1.25e-06, - "input_cost_per_token": 1.25e-06, - "input_cost_per_token_above_200k_tokens": 2.5e-06, - "output_cost_per_token": 1e-05, - "output_cost_per_token_above_200k_tokens": 1.5e-05, + "input_cost_per_audio_token": 0.00000125, + "input_cost_per_token": 0.00000125, + "input_cost_per_token_above_200k_tokens": 0.0000025, + "output_cost_per_token": 0.00001, + "output_cost_per_token_above_200k_tokens": 0.000015, "litellm_provider": "vertex_ai-language-models", "mode": "chat", "supports_reasoning": true, @@ -8048,7 +8647,7 @@ "supports_parallel_function_calling": true, "supports_web_search": true, "supports_pdf_input": true, - "cache_read_input_token_cost": 3.125e-07, + "cache_read_input_token_cost": 3.125e-7, "supports_prompt_caching": true }, "gemini-2.0-flash-preview-image-generation": { @@ -8061,9 +8660,9 @@ "max_audio_length_hours": 8.4, "max_audio_per_prompt": 1, "max_pdf_size_mb": 30, - "input_cost_per_audio_token": 7e-07, - "input_cost_per_token": 1e-07, - "output_cost_per_token": 4e-07, + "input_cost_per_audio_token": 7e-7, + "input_cost_per_token": 1e-7, + "output_cost_per_token": 4e-7, "litellm_provider": "vertex_ai-language-models", "mode": "chat", "supports_system_messages": true, @@ -8086,7 +8685,7 @@ "source": "https://ai.google.dev/pricing#2_0flash", "supports_parallel_function_calling": true, "supports_web_search": true, - "cache_read_input_token_cost": 2.5e-08, + "cache_read_input_token_cost": 2.5e-8, "supports_prompt_caching": true }, "gemini-2.5-pro-preview-tts": { @@ -8099,11 +8698,11 @@ "max_audio_length_hours": 8.4, "max_audio_per_prompt": 1, "max_pdf_size_mb": 30, - "input_cost_per_audio_token": 7e-07, - "input_cost_per_token": 1.25e-06, - "input_cost_per_token_above_200k_tokens": 2.5e-06, - "output_cost_per_token": 1e-05, - "output_cost_per_token_above_200k_tokens": 1.5e-05, + "input_cost_per_audio_token": 7e-7, + "input_cost_per_token": 0.00000125, + "input_cost_per_token_above_200k_tokens": 0.0000025, + "output_cost_per_token": 0.00001, + "output_cost_per_token_above_200k_tokens": 0.000015, "litellm_provider": "vertex_ai-language-models", "mode": "chat", "supports_system_messages": true, @@ -8121,7 +8720,7 @@ "source": "https://ai.google.dev/gemini-api/docs/pricing#gemini-2.5-pro-preview", "supports_parallel_function_calling": true, "supports_web_search": true, - "cache_read_input_token_cost": 3.125e-07, + "cache_read_input_token_cost": 3.125e-7, "supports_prompt_caching": true }, "gemini/gemini-2.0-pro-exp-02-05": { @@ -8162,7 +8761,7 @@ "supports_tool_choice": true, "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing", "supports_web_search": true, - "cache_read_input_token_cost": 0.0, + "cache_read_input_token_cost": 0, "supports_prompt_caching": true }, "gemini/gemini-2.0-flash-preview-image-generation": { @@ -8175,9 +8774,9 @@ "max_audio_length_hours": 8.4, "max_audio_per_prompt": 1, "max_pdf_size_mb": 30, - "input_cost_per_audio_token": 7e-07, - "input_cost_per_token": 1e-07, - "output_cost_per_token": 4e-07, + "input_cost_per_audio_token": 7e-7, + "input_cost_per_token": 1e-7, + "output_cost_per_token": 4e-7, "litellm_provider": "gemini", "mode": "chat", "rpm": 10000, @@ -8201,7 +8800,7 @@ "supports_tool_choice": true, "source": "https://ai.google.dev/pricing#2_0flash", "supports_web_search": true, - "cache_read_input_token_cost": 2.5e-08, + "cache_read_input_token_cost": 2.5e-8, "supports_prompt_caching": true }, "gemini/gemini-2.0-flash": { @@ -8214,9 +8813,9 @@ "max_audio_length_hours": 8.4, "max_audio_per_prompt": 1, "max_pdf_size_mb": 30, - "input_cost_per_audio_token": 7e-07, - "input_cost_per_token": 1e-07, - "output_cost_per_token": 4e-07, + "input_cost_per_audio_token": 7e-7, + "input_cost_per_token": 1e-7, + "output_cost_per_token": 4e-7, "litellm_provider": "gemini", "mode": "chat", "rpm": 10000, @@ -8241,7 +8840,7 @@ "source": "https://ai.google.dev/pricing#2_0flash", "supports_web_search": true, "supports_url_context": true, - "cache_read_input_token_cost": 2.5e-08, + "cache_read_input_token_cost": 2.5e-8, "supports_prompt_caching": true }, "gemini/gemini-2.0-flash-lite": { @@ -8253,9 +8852,9 @@ "max_audio_length_hours": 8.4, "max_audio_per_prompt": 1, "max_pdf_size_mb": 50, - "input_cost_per_audio_token": 7.5e-08, - "input_cost_per_token": 7.5e-08, - "output_cost_per_token": 3e-07, + "input_cost_per_audio_token": 7.5e-8, + "input_cost_per_token": 7.5e-8, + "output_cost_per_token": 3e-7, "litellm_provider": "gemini", "mode": "chat", "tpm": 4000000, @@ -8277,7 +8876,7 @@ ], "source": "https://ai.google.dev/gemini-api/docs/pricing#gemini-2.0-flash-lite", "supports_web_search": true, - "cache_read_input_token_cost": 1.875e-08, + "cache_read_input_token_cost": 1.875e-8, "supports_prompt_caching": true }, "gemini/gemini-2.0-flash-001": { @@ -8290,9 +8889,9 @@ "max_audio_length_hours": 8.4, "max_audio_per_prompt": 1, "max_pdf_size_mb": 30, - "input_cost_per_audio_token": 7e-07, - "input_cost_per_token": 1e-07, - "output_cost_per_token": 4e-07, + "input_cost_per_audio_token": 7e-7, + "input_cost_per_token": 1e-7, + "output_cost_per_token": 4e-7, "litellm_provider": "gemini", "mode": "chat", "rpm": 10000, @@ -8315,7 +8914,7 @@ ], "source": "https://ai.google.dev/pricing#2_0flash", "supports_web_search": true, - "cache_read_input_token_cost": 2.5e-08, + "cache_read_input_token_cost": 2.5e-8, "supports_prompt_caching": true }, "gemini/gemini-2.5-pro-preview-tts": { @@ -8328,11 +8927,11 @@ "max_audio_length_hours": 8.4, "max_audio_per_prompt": 1, "max_pdf_size_mb": 30, - "input_cost_per_audio_token": 7e-07, - "input_cost_per_token": 1.25e-06, - "input_cost_per_token_above_200k_tokens": 2.5e-06, - "output_cost_per_token": 1e-05, - "output_cost_per_token_above_200k_tokens": 1.5e-05, + "input_cost_per_audio_token": 7e-7, + "input_cost_per_token": 0.00000125, + "input_cost_per_token_above_200k_tokens": 0.0000025, + "output_cost_per_token": 0.00001, + "output_cost_per_token_above_200k_tokens": 0.000015, "litellm_provider": "gemini", "mode": "chat", "rpm": 10000, @@ -8351,7 +8950,7 @@ ], "source": "https://ai.google.dev/gemini-api/docs/pricing#gemini-2.5-pro-preview", "supports_web_search": true, - "cache_read_input_token_cost": 3.125e-07, + "cache_read_input_token_cost": 3.125e-7, "supports_prompt_caching": true }, "gemini/gemini-2.5-pro-preview-06-05": { @@ -8364,11 +8963,11 @@ "max_audio_length_hours": 8.4, "max_audio_per_prompt": 1, "max_pdf_size_mb": 30, - "input_cost_per_audio_token": 7e-07, - "input_cost_per_token": 1.25e-06, - "input_cost_per_token_above_200k_tokens": 2.5e-06, - "output_cost_per_token": 1e-05, - "output_cost_per_token_above_200k_tokens": 1.5e-05, + "input_cost_per_audio_token": 7e-7, + "input_cost_per_token": 0.00000125, + "input_cost_per_token_above_200k_tokens": 0.0000025, + "output_cost_per_token": 0.00001, + "output_cost_per_token_above_200k_tokens": 0.000015, "litellm_provider": "gemini", "mode": "chat", "rpm": 10000, @@ -8392,7 +8991,7 @@ "supports_web_search": true, "supports_url_context": true, "supports_pdf_input": true, - "cache_read_input_token_cost": 3.125e-07, + "cache_read_input_token_cost": 3.125e-7, "supports_prompt_caching": true }, "gemini/gemini-2.5-pro-preview-05-06": { @@ -8405,11 +9004,11 @@ "max_audio_length_hours": 8.4, "max_audio_per_prompt": 1, "max_pdf_size_mb": 30, - "input_cost_per_audio_token": 7e-07, - "input_cost_per_token": 1.25e-06, - "input_cost_per_token_above_200k_tokens": 2.5e-06, - "output_cost_per_token": 1e-05, - "output_cost_per_token_above_200k_tokens": 1.5e-05, + "input_cost_per_audio_token": 7e-7, + "input_cost_per_token": 0.00000125, + "input_cost_per_token_above_200k_tokens": 0.0000025, + "output_cost_per_token": 0.00001, + "output_cost_per_token_above_200k_tokens": 0.000015, "litellm_provider": "gemini", "mode": "chat", "rpm": 10000, @@ -8433,7 +9032,7 @@ "supports_web_search": true, "supports_url_context": true, "supports_pdf_input": true, - "cache_read_input_token_cost": 3.125e-07, + "cache_read_input_token_cost": 3.125e-7, "supports_prompt_caching": true }, "gemini/gemini-2.5-pro-preview-03-25": { @@ -8446,11 +9045,11 @@ "max_audio_length_hours": 8.4, "max_audio_per_prompt": 1, "max_pdf_size_mb": 30, - "input_cost_per_audio_token": 7e-07, - "input_cost_per_token": 1.25e-06, - "input_cost_per_token_above_200k_tokens": 2.5e-06, - "output_cost_per_token": 1e-05, - "output_cost_per_token_above_200k_tokens": 1.5e-05, + "input_cost_per_audio_token": 7e-7, + "input_cost_per_token": 0.00000125, + "input_cost_per_token_above_200k_tokens": 0.0000025, + "output_cost_per_token": 0.00001, + "output_cost_per_token_above_200k_tokens": 0.000015, "litellm_provider": "gemini", "mode": "chat", "rpm": 10000, @@ -8473,7 +9072,7 @@ "source": "https://ai.google.dev/gemini-api/docs/pricing#gemini-2.5-pro-preview", "supports_web_search": true, "supports_pdf_input": true, - "cache_read_input_token_cost": 3.125e-07, + "cache_read_input_token_cost": 3.125e-7, "supports_prompt_caching": true }, "gemini/gemini-2.0-flash-exp": { @@ -8522,7 +9121,7 @@ "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#gemini-2.0-flash", "supports_tool_choice": true, "supports_web_search": true, - "cache_read_input_token_cost": 0.0, + "cache_read_input_token_cost": 0, "supports_prompt_caching": true }, "gemini/gemini-2.0-flash-lite-preview-02-05": { @@ -8535,9 +9134,9 @@ "max_audio_length_hours": 8.4, "max_audio_per_prompt": 1, "max_pdf_size_mb": 30, - "input_cost_per_audio_token": 7.5e-08, - "input_cost_per_token": 7.5e-08, - "output_cost_per_token": 3e-07, + "input_cost_per_audio_token": 7.5e-8, + "input_cost_per_token": 7.5e-8, + "output_cost_per_token": 3e-7, "litellm_provider": "gemini", "mode": "chat", "rpm": 60000, @@ -8559,7 +9158,7 @@ ], "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#gemini-2.0-flash-lite", "supports_web_search": true, - "cache_read_input_token_cost": 1.875e-08, + "cache_read_input_token_cost": 1.875e-8, "supports_prompt_caching": true }, "gemini/gemini-2.0-flash-thinking-exp": { @@ -8608,7 +9207,7 @@ "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#gemini-2.0-flash", "supports_tool_choice": true, "supports_web_search": true, - "cache_read_input_token_cost": 0.0, + "cache_read_input_token_cost": 0, "supports_prompt_caching": true }, "gemini/gemini-2.0-flash-thinking-exp-01-21": { @@ -8657,7 +9256,7 @@ "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#gemini-2.0-flash", "supports_tool_choice": true, "supports_web_search": true, - "cache_read_input_token_cost": 0.0, + "cache_read_input_token_cost": 0, "supports_prompt_caching": true }, "gemini/gemma-3-27b-it": { @@ -8716,12 +9315,46 @@ "source": "https://aistudio.google.com", "supports_tool_choice": true }, + "vertex_ai/claude-opus-4-1": { + "max_tokens": 4096, + "max_input_tokens": 200000, + "max_output_tokens": 4096, + "input_cost_per_token": 0.000015, + "output_cost_per_token": 0.000075, + "input_cost_per_token_batches": 0.0000075, + "output_cost_per_token_batches": 0.0000375, + "cache_creation_input_token_cost": 0.00001875, + "cache_read_input_token_cost": 0.0000015, + "litellm_provider": "vertex_ai-anthropic_models", + "mode": "chat", + "supports_function_calling": true, + "supports_vision": true, + "supports_assistant_prefill": true, + "supports_tool_choice": true + }, + "vertex_ai/claude-opus-4-1@20250805": { + "max_tokens": 4096, + "max_input_tokens": 200000, + "max_output_tokens": 4096, + "input_cost_per_token": 0.000015, + "output_cost_per_token": 0.000075, + "input_cost_per_token_batches": 0.0000075, + "output_cost_per_token_batches": 0.0000375, + "cache_creation_input_token_cost": 0.00001875, + "cache_read_input_token_cost": 0.0000015, + "litellm_provider": "vertex_ai-anthropic_models", + "mode": "chat", + "supports_function_calling": true, + "supports_vision": true, + "supports_assistant_prefill": true, + "supports_tool_choice": true + }, "vertex_ai/claude-3-sonnet": { "max_tokens": 4096, "max_input_tokens": 200000, "max_output_tokens": 4096, - "input_cost_per_token": 3e-06, - "output_cost_per_token": 1.5e-05, + "input_cost_per_token": 0.000003, + "output_cost_per_token": 0.000015, "litellm_provider": "vertex_ai-anthropic_models", "mode": "chat", "supports_function_calling": true, @@ -8739,12 +9372,12 @@ "max_audio_length_hours": 8.4, "max_audio_per_prompt": 1, "max_pdf_size_mb": 30, - "input_cost_per_token": 5e-07, - "input_cost_per_audio_token": 3e-06, - "input_cost_per_image": 3e-06, - "input_cost_per_video_per_second": 3e-06, - "output_cost_per_token": 2e-06, - "output_cost_per_audio_token": 1.2e-05, + "input_cost_per_token": 5e-7, + "input_cost_per_audio_token": 0.000003, + "input_cost_per_image": 0.000003, + "input_cost_per_video_per_second": 0.000003, + "output_cost_per_token": 0.000002, + "output_cost_per_audio_token": 0.000012, "litellm_provider": "vertex_ai-language-models", "mode": "chat", "rpm": 10, @@ -8774,15 +9407,15 @@ "supports_web_search": true, "supports_url_context": true, "supports_pdf_input": true, - "cache_read_input_token_cost": 7.5e-08, + "cache_read_input_token_cost": 7.5e-8, "supports_prompt_caching": true }, "vertex_ai/claude-3-sonnet@20240229": { "max_tokens": 4096, "max_input_tokens": 200000, "max_output_tokens": 4096, - "input_cost_per_token": 3e-06, - "output_cost_per_token": 1.5e-05, + "input_cost_per_token": 0.000003, + "output_cost_per_token": 0.000015, "litellm_provider": "vertex_ai-anthropic_models", "mode": "chat", "supports_function_calling": true, @@ -8795,8 +9428,8 @@ "max_tokens": 8192, "max_input_tokens": 200000, "max_output_tokens": 8192, - "input_cost_per_token": 3e-06, - "output_cost_per_token": 1.5e-05, + "input_cost_per_token": 0.000003, + "output_cost_per_token": 0.000015, "litellm_provider": "vertex_ai-anthropic_models", "mode": "chat", "supports_function_calling": true, @@ -8809,8 +9442,8 @@ "max_tokens": 8192, "max_input_tokens": 200000, "max_output_tokens": 8192, - "input_cost_per_token": 3e-06, - "output_cost_per_token": 1.5e-05, + "input_cost_per_token": 0.000003, + "output_cost_per_token": 0.000015, "litellm_provider": "vertex_ai-anthropic_models", "mode": "chat", "supports_function_calling": true, @@ -8824,8 +9457,8 @@ "max_tokens": 8192, "max_input_tokens": 200000, "max_output_tokens": 8192, - "input_cost_per_token": 3e-06, - "output_cost_per_token": 1.5e-05, + "input_cost_per_token": 0.000003, + "output_cost_per_token": 0.000015, "litellm_provider": "vertex_ai-anthropic_models", "mode": "chat", "supports_function_calling": true, @@ -8839,8 +9472,8 @@ "max_tokens": 8192, "max_input_tokens": 200000, "max_output_tokens": 8192, - "input_cost_per_token": 3e-06, - "output_cost_per_token": 1.5e-05, + "input_cost_per_token": 0.000003, + "output_cost_per_token": 0.000015, "litellm_provider": "vertex_ai-anthropic_models", "mode": "chat", "supports_function_calling": true, @@ -8854,10 +9487,10 @@ "max_tokens": 8192, "max_input_tokens": 200000, "max_output_tokens": 8192, - "input_cost_per_token": 3e-06, - "output_cost_per_token": 1.5e-05, - "cache_creation_input_token_cost": 3.75e-06, - "cache_read_input_token_cost": 3e-07, + "input_cost_per_token": 0.000003, + "output_cost_per_token": 0.000015, + "cache_creation_input_token_cost": 0.00000375, + "cache_read_input_token_cost": 3e-7, "litellm_provider": "vertex_ai-anthropic_models", "mode": "chat", "supports_function_calling": true, @@ -8875,15 +9508,15 @@ "max_tokens": 32000, "max_input_tokens": 200000, "max_output_tokens": 32000, - "input_cost_per_token": 1.5e-05, - "output_cost_per_token": 7.5e-05, + "input_cost_per_token": 0.000015, + "output_cost_per_token": 0.000075, "search_context_cost_per_query": { "search_context_size_low": 0.01, "search_context_size_medium": 0.01, "search_context_size_high": 0.01 }, - "cache_creation_input_token_cost": 1.875e-05, - "cache_read_input_token_cost": 1.5e-06, + "cache_creation_input_token_cost": 0.00001875, + "cache_read_input_token_cost": 0.0000015, "litellm_provider": "vertex_ai-anthropic_models", "mode": "chat", "supports_function_calling": true, @@ -8901,15 +9534,15 @@ "max_tokens": 32000, "max_input_tokens": 200000, "max_output_tokens": 32000, - "input_cost_per_token": 1.5e-05, - "output_cost_per_token": 7.5e-05, + "input_cost_per_token": 0.000015, + "output_cost_per_token": 0.000075, "search_context_cost_per_query": { "search_context_size_low": 0.01, "search_context_size_medium": 0.01, "search_context_size_high": 0.01 }, - "cache_creation_input_token_cost": 1.875e-05, - "cache_read_input_token_cost": 1.5e-06, + "cache_creation_input_token_cost": 0.00001875, + "cache_read_input_token_cost": 0.0000015, "litellm_provider": "vertex_ai-anthropic_models", "mode": "chat", "supports_function_calling": true, @@ -8927,15 +9560,15 @@ "max_tokens": 64000, "max_input_tokens": 200000, "max_output_tokens": 64000, - "input_cost_per_token": 3e-06, - "output_cost_per_token": 1.5e-05, + "input_cost_per_token": 0.000003, + "output_cost_per_token": 0.000015, "search_context_cost_per_query": { "search_context_size_low": 0.01, "search_context_size_medium": 0.01, "search_context_size_high": 0.01 }, - "cache_creation_input_token_cost": 3.75e-06, - "cache_read_input_token_cost": 3e-07, + "cache_creation_input_token_cost": 0.00000375, + "cache_read_input_token_cost": 3e-7, "litellm_provider": "vertex_ai-anthropic_models", "mode": "chat", "supports_function_calling": true, @@ -8953,15 +9586,15 @@ "max_tokens": 64000, "max_input_tokens": 200000, "max_output_tokens": 64000, - "input_cost_per_token": 3e-06, - "output_cost_per_token": 1.5e-05, + "input_cost_per_token": 0.000003, + "output_cost_per_token": 0.000015, "search_context_cost_per_query": { "search_context_size_low": 0.01, "search_context_size_medium": 0.01, "search_context_size_high": 0.01 }, - "cache_creation_input_token_cost": 3.75e-06, - "cache_read_input_token_cost": 3e-07, + "cache_creation_input_token_cost": 0.00000375, + "cache_read_input_token_cost": 3e-7, "litellm_provider": "vertex_ai-anthropic_models", "mode": "chat", "supports_function_calling": true, @@ -8979,8 +9612,8 @@ "max_tokens": 4096, "max_input_tokens": 200000, "max_output_tokens": 4096, - "input_cost_per_token": 2.5e-07, - "output_cost_per_token": 1.25e-06, + "input_cost_per_token": 2.5e-7, + "output_cost_per_token": 0.00000125, "litellm_provider": "vertex_ai-anthropic_models", "mode": "chat", "supports_function_calling": true, @@ -8992,8 +9625,8 @@ "max_tokens": 4096, "max_input_tokens": 200000, "max_output_tokens": 4096, - "input_cost_per_token": 2.5e-07, - "output_cost_per_token": 1.25e-06, + "input_cost_per_token": 2.5e-7, + "output_cost_per_token": 0.00000125, "litellm_provider": "vertex_ai-anthropic_models", "mode": "chat", "supports_function_calling": true, @@ -9005,8 +9638,8 @@ "max_tokens": 8192, "max_input_tokens": 200000, "max_output_tokens": 8192, - "input_cost_per_token": 1e-06, - "output_cost_per_token": 5e-06, + "input_cost_per_token": 0.000001, + "output_cost_per_token": 0.000005, "litellm_provider": "vertex_ai-anthropic_models", "mode": "chat", "supports_function_calling": true, @@ -9018,8 +9651,8 @@ "max_tokens": 8192, "max_input_tokens": 200000, "max_output_tokens": 8192, - "input_cost_per_token": 1e-06, - "output_cost_per_token": 5e-06, + "input_cost_per_token": 0.000001, + "output_cost_per_token": 0.000005, "litellm_provider": "vertex_ai-anthropic_models", "mode": "chat", "supports_function_calling": true, @@ -9031,8 +9664,8 @@ "max_tokens": 4096, "max_input_tokens": 200000, "max_output_tokens": 4096, - "input_cost_per_token": 1.5e-05, - "output_cost_per_token": 7.5e-05, + "input_cost_per_token": 0.000015, + "output_cost_per_token": 0.000075, "litellm_provider": "vertex_ai-anthropic_models", "mode": "chat", "supports_function_calling": true, @@ -9044,8 +9677,8 @@ "max_tokens": 4096, "max_input_tokens": 200000, "max_output_tokens": 4096, - "input_cost_per_token": 1.5e-05, - "output_cost_per_token": 7.5e-05, + "input_cost_per_token": 0.000015, + "output_cost_per_token": 0.000075, "litellm_provider": "vertex_ai-anthropic_models", "mode": "chat", "supports_function_calling": true, @@ -9057,8 +9690,8 @@ "max_tokens": 32000, "max_input_tokens": 32000, "max_output_tokens": 32000, - "input_cost_per_token": 0.0, - "output_cost_per_token": 0.0, + "input_cost_per_token": 0, + "output_cost_per_token": 0, "litellm_provider": "vertex_ai-llama_models", "mode": "chat", "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing#partner-models", @@ -9068,8 +9701,8 @@ "max_tokens": 10000000, "max_input_tokens": 10000000, "max_output_tokens": 10000000, - "input_cost_per_token": 2.5e-07, - "output_cost_per_token": 7e-07, + "input_cost_per_token": 2.5e-7, + "output_cost_per_token": 7e-7, "litellm_provider": "vertex_ai-llama_models", "mode": "chat", "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing#partner-models", @@ -9088,8 +9721,8 @@ "max_tokens": 10000000, "max_input_tokens": 10000000, "max_output_tokens": 10000000, - "input_cost_per_token": 2.5e-07, - "output_cost_per_token": 7e-07, + "input_cost_per_token": 2.5e-7, + "output_cost_per_token": 7e-7, "litellm_provider": "vertex_ai-llama_models", "mode": "chat", "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing#partner-models", @@ -9108,8 +9741,8 @@ "max_tokens": 1000000, "max_input_tokens": 1000000, "max_output_tokens": 1000000, - "input_cost_per_token": 3.5e-07, - "output_cost_per_token": 1.15e-06, + "input_cost_per_token": 3.5e-7, + "output_cost_per_token": 0.00000115, "litellm_provider": "vertex_ai-llama_models", "mode": "chat", "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing#partner-models", @@ -9128,8 +9761,8 @@ "max_tokens": 1000000, "max_input_tokens": 1000000, "max_output_tokens": 1000000, - "input_cost_per_token": 3.5e-07, - "output_cost_per_token": 1.15e-06, + "input_cost_per_token": 3.5e-7, + "output_cost_per_token": 0.00000115, "litellm_provider": "vertex_ai-llama_models", "mode": "chat", "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing#partner-models", @@ -9148,8 +9781,8 @@ "max_tokens": 32000, "max_input_tokens": 32000, "max_output_tokens": 32000, - "input_cost_per_token": 0.0, - "output_cost_per_token": 0.0, + "input_cost_per_token": 0, + "output_cost_per_token": 0, "litellm_provider": "vertex_ai-llama_models", "mode": "chat", "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing#partner-models", @@ -9159,8 +9792,8 @@ "max_tokens": 32000, "max_input_tokens": 32000, "max_output_tokens": 32000, - "input_cost_per_token": 0.0, - "output_cost_per_token": 0.0, + "input_cost_per_token": 0, + "output_cost_per_token": 0, "litellm_provider": "vertex_ai-llama_models", "mode": "chat", "source": "https://cloud.google.com/vertex-ai/generative-ai/pricing#partner-models", @@ -9170,8 +9803,8 @@ "max_tokens": 128000, "max_input_tokens": 128000, "max_output_tokens": 2048, - "input_cost_per_token": 0.0, - "output_cost_per_token": 0.0, + "input_cost_per_token": 0, + "output_cost_per_token": 0, "litellm_provider": "vertex_ai-llama_models", "mode": "chat", "supports_system_messages": true, @@ -9186,8 +9819,8 @@ "max_tokens": 128000, "max_input_tokens": 128000, "max_output_tokens": 2048, - "input_cost_per_token": 0.0, - "output_cost_per_token": 0.0, + "input_cost_per_token": 0, + "output_cost_per_token": 0, "litellm_provider": "vertex_ai-llama_models", "mode": "chat", "supports_system_messages": true, @@ -9199,8 +9832,8 @@ "max_tokens": 128000, "max_input_tokens": 128000, "max_output_tokens": 2048, - "input_cost_per_token": 5e-06, - "output_cost_per_token": 1.6e-05, + "input_cost_per_token": 0.000005, + "output_cost_per_token": 0.000016, "litellm_provider": "vertex_ai-llama_models", "mode": "chat", "supports_system_messages": true, @@ -9212,8 +9845,8 @@ "max_tokens": 128000, "max_input_tokens": 128000, "max_output_tokens": 2048, - "input_cost_per_token": 0.0, - "output_cost_per_token": 0.0, + "input_cost_per_token": 0, + "output_cost_per_token": 0, "litellm_provider": "vertex_ai-llama_models", "mode": "chat", "supports_system_messages": true, @@ -9228,8 +9861,8 @@ "max_tokens": 8191, "max_input_tokens": 128000, "max_output_tokens": 8191, - "input_cost_per_token": 2e-06, - "output_cost_per_token": 6e-06, + "input_cost_per_token": 0.000002, + "output_cost_per_token": 0.000006, "litellm_provider": "vertex_ai-mistral_models", "mode": "chat", "supports_function_calling": true, @@ -9239,8 +9872,8 @@ "max_tokens": 8191, "max_input_tokens": 128000, "max_output_tokens": 8191, - "input_cost_per_token": 2e-06, - "output_cost_per_token": 6e-06, + "input_cost_per_token": 0.000002, + "output_cost_per_token": 0.000006, "litellm_provider": "vertex_ai-mistral_models", "mode": "chat", "supports_function_calling": true, @@ -9250,8 +9883,8 @@ "max_tokens": 8191, "max_input_tokens": 128000, "max_output_tokens": 8191, - "input_cost_per_token": 2e-06, - "output_cost_per_token": 6e-06, + "input_cost_per_token": 0.000002, + "output_cost_per_token": 0.000006, "litellm_provider": "vertex_ai-mistral_models", "mode": "chat", "supports_function_calling": true, @@ -9261,8 +9894,8 @@ "max_tokens": 8191, "max_input_tokens": 128000, "max_output_tokens": 8191, - "input_cost_per_token": 2e-06, - "output_cost_per_token": 6e-06, + "input_cost_per_token": 0.000002, + "output_cost_per_token": 0.000006, "litellm_provider": "vertex_ai-mistral_models", "mode": "chat", "supports_function_calling": true, @@ -9272,8 +9905,8 @@ "max_tokens": 128000, "max_input_tokens": 128000, "max_output_tokens": 128000, - "input_cost_per_token": 1.5e-07, - "output_cost_per_token": 1.5e-07, + "input_cost_per_token": 1.5e-7, + "output_cost_per_token": 1.5e-7, "litellm_provider": "vertex_ai-mistral_models", "mode": "chat", "supports_function_calling": true, @@ -9283,8 +9916,8 @@ "max_tokens": 8191, "max_input_tokens": 32000, "max_output_tokens": 8191, - "input_cost_per_token": 1e-06, - "output_cost_per_token": 3e-06, + "input_cost_per_token": 0.000001, + "output_cost_per_token": 0.000003, "litellm_provider": "vertex_ai-mistral_models", "supports_function_calling": true, "mode": "chat", @@ -9294,8 +9927,8 @@ "max_tokens": 128000, "max_input_tokens": 128000, "max_output_tokens": 128000, - "input_cost_per_token": 1e-06, - "output_cost_per_token": 3e-06, + "input_cost_per_token": 0.000001, + "output_cost_per_token": 0.000003, "litellm_provider": "vertex_ai-mistral_models", "mode": "chat", "supports_function_calling": true, @@ -9306,8 +9939,8 @@ "max_tokens": 256000, "max_input_tokens": 256000, "max_output_tokens": 256000, - "input_cost_per_token": 2e-07, - "output_cost_per_token": 4e-07, + "input_cost_per_token": 2e-7, + "output_cost_per_token": 4e-7, "litellm_provider": "vertex_ai-ai21_models", "mode": "chat", "supports_tool_choice": true @@ -9316,8 +9949,8 @@ "max_tokens": 256000, "max_input_tokens": 256000, "max_output_tokens": 256000, - "input_cost_per_token": 2e-06, - "output_cost_per_token": 8e-06, + "input_cost_per_token": 0.000002, + "output_cost_per_token": 0.000008, "litellm_provider": "vertex_ai-ai21_models", "mode": "chat", "supports_tool_choice": true @@ -9326,8 +9959,8 @@ "max_tokens": 256000, "max_input_tokens": 256000, "max_output_tokens": 256000, - "input_cost_per_token": 2e-07, - "output_cost_per_token": 4e-07, + "input_cost_per_token": 2e-7, + "output_cost_per_token": 4e-7, "litellm_provider": "vertex_ai-ai21_models", "mode": "chat", "supports_tool_choice": true @@ -9336,8 +9969,8 @@ "max_tokens": 256000, "max_input_tokens": 256000, "max_output_tokens": 256000, - "input_cost_per_token": 2e-07, - "output_cost_per_token": 4e-07, + "input_cost_per_token": 2e-7, + "output_cost_per_token": 4e-7, "litellm_provider": "vertex_ai-ai21_models", "mode": "chat", "supports_tool_choice": true @@ -9346,8 +9979,8 @@ "max_tokens": 256000, "max_input_tokens": 256000, "max_output_tokens": 256000, - "input_cost_per_token": 2e-06, - "output_cost_per_token": 8e-06, + "input_cost_per_token": 0.000002, + "output_cost_per_token": 0.000008, "litellm_provider": "vertex_ai-ai21_models", "mode": "chat", "supports_tool_choice": true @@ -9356,8 +9989,8 @@ "max_tokens": 128000, "max_input_tokens": 128000, "max_output_tokens": 128000, - "input_cost_per_token": 3e-06, - "output_cost_per_token": 3e-06, + "input_cost_per_token": 0.000003, + "output_cost_per_token": 0.000003, "litellm_provider": "vertex_ai-mistral_models", "mode": "chat", "supports_function_calling": true, @@ -9367,8 +10000,8 @@ "max_tokens": 128000, "max_input_tokens": 128000, "max_output_tokens": 128000, - "input_cost_per_token": 2e-07, - "output_cost_per_token": 6e-07, + "input_cost_per_token": 2e-7, + "output_cost_per_token": 6e-7, "litellm_provider": "vertex_ai-mistral_models", "mode": "chat", "supports_function_calling": true, @@ -9378,8 +10011,8 @@ "max_tokens": 128000, "max_input_tokens": 128000, "max_output_tokens": 128000, - "input_cost_per_token": 2e-07, - "output_cost_per_token": 6e-07, + "input_cost_per_token": 2e-7, + "output_cost_per_token": 6e-7, "litellm_provider": "vertex_ai-mistral_models", "mode": "chat", "supports_function_calling": true, @@ -9389,8 +10022,8 @@ "max_tokens": 128000, "max_input_tokens": 128000, "max_output_tokens": 128000, - "input_cost_per_token": 2e-07, - "output_cost_per_token": 6e-07, + "input_cost_per_token": 2e-7, + "output_cost_per_token": 6e-7, "litellm_provider": "vertex_ai-mistral_models", "mode": "chat", "supports_function_calling": true, @@ -9442,8 +10075,8 @@ "max_tokens": 2048, "max_input_tokens": 2048, "output_vector_size": 768, - "input_cost_per_character": 2.5e-08, - "input_cost_per_token": 1e-07, + "input_cost_per_character": 2.5e-8, + "input_cost_per_token": 1e-7, "output_cost_per_token": 0, "litellm_provider": "vertex_ai-embedding-models", "mode": "embedding", @@ -9453,7 +10086,7 @@ "max_tokens": 2048, "max_input_tokens": 2048, "output_vector_size": 3072, - "input_cost_per_token": 1.5e-07, + "input_cost_per_token": 1.5e-7, "output_cost_per_token": 0, "litellm_provider": "vertex_ai-embedding-models", "mode": "embedding", @@ -9463,8 +10096,8 @@ "max_tokens": 2048, "max_input_tokens": 2048, "output_vector_size": 768, - "input_cost_per_character": 2.5e-08, - "input_cost_per_token": 1e-07, + "input_cost_per_character": 2.5e-8, + "input_cost_per_token": 1e-7, "output_cost_per_token": 0, "litellm_provider": "vertex_ai-embedding-models", "mode": "embedding", @@ -9474,8 +10107,8 @@ "max_tokens": 2048, "max_input_tokens": 2048, "output_vector_size": 768, - "input_cost_per_character": 2.5e-08, - "input_cost_per_token": 1e-07, + "input_cost_per_character": 2.5e-8, + "input_cost_per_token": 1e-7, "output_cost_per_token": 0, "litellm_provider": "vertex_ai-embedding-models", "mode": "embedding", @@ -9485,12 +10118,12 @@ "max_tokens": 2048, "max_input_tokens": 2048, "output_vector_size": 768, - "input_cost_per_character": 2e-07, + "input_cost_per_character": 2e-7, "input_cost_per_image": 0.0001, "input_cost_per_video_per_second": 0.0005, "input_cost_per_video_per_second_above_8s_interval": 0.001, "input_cost_per_video_per_second_above_15s_interval": 0.002, - "input_cost_per_token": 8e-07, + "input_cost_per_token": 8e-7, "output_cost_per_token": 0, "litellm_provider": "vertex_ai-embedding-models", "mode": "embedding", @@ -9508,12 +10141,12 @@ "max_tokens": 2048, "max_input_tokens": 2048, "output_vector_size": 768, - "input_cost_per_character": 2e-07, + "input_cost_per_character": 2e-7, "input_cost_per_image": 0.0001, "input_cost_per_video_per_second": 0.0005, "input_cost_per_video_per_second_above_8s_interval": 0.001, "input_cost_per_video_per_second_above_15s_interval": 0.002, - "input_cost_per_token": 8e-07, + "input_cost_per_token": 8e-7, "output_cost_per_token": 0, "litellm_provider": "vertex_ai-embedding-models", "mode": "embedding", @@ -9531,8 +10164,8 @@ "max_tokens": 8192, "max_input_tokens": 8192, "output_vector_size": 3072, - "input_cost_per_character": 2.5e-08, - "input_cost_per_token": 1e-07, + "input_cost_per_character": 2.5e-8, + "input_cost_per_token": 1e-7, "output_cost_per_token": 0, "litellm_provider": "vertex_ai-embedding-models", "mode": "embedding", @@ -9542,8 +10175,8 @@ "max_tokens": 3072, "max_input_tokens": 3072, "output_vector_size": 768, - "input_cost_per_character": 2.5e-08, - "input_cost_per_token": 1e-07, + "input_cost_per_character": 2.5e-8, + "input_cost_per_token": 1e-7, "output_cost_per_token": 0, "litellm_provider": "vertex_ai-embedding-models", "mode": "embedding", @@ -9553,8 +10186,8 @@ "max_tokens": 3072, "max_input_tokens": 3072, "output_vector_size": 768, - "input_cost_per_character": 2.5e-08, - "input_cost_per_token": 1e-07, + "input_cost_per_character": 2.5e-8, + "input_cost_per_token": 1e-7, "output_cost_per_token": 0, "litellm_provider": "vertex_ai-embedding-models", "mode": "embedding", @@ -9564,8 +10197,8 @@ "max_tokens": 3072, "max_input_tokens": 3072, "output_vector_size": 768, - "input_cost_per_character": 2.5e-08, - "input_cost_per_token": 1e-07, + "input_cost_per_character": 2.5e-8, + "input_cost_per_token": 1e-7, "output_cost_per_token": 0, "litellm_provider": "vertex_ai-embedding-models", "mode": "embedding", @@ -9575,8 +10208,8 @@ "max_tokens": 3072, "max_input_tokens": 3072, "output_vector_size": 768, - "input_cost_per_character": 2.5e-08, - "input_cost_per_token": 1e-07, + "input_cost_per_character": 2.5e-8, + "input_cost_per_token": 1e-7, "output_cost_per_token": 0, "litellm_provider": "vertex_ai-embedding-models", "mode": "embedding", @@ -9586,8 +10219,8 @@ "max_tokens": 3072, "max_input_tokens": 3072, "output_vector_size": 768, - "input_cost_per_character": 2.5e-08, - "input_cost_per_token": 1e-07, + "input_cost_per_character": 2.5e-8, + "input_cost_per_token": 1e-7, "output_cost_per_token": 0, "litellm_provider": "vertex_ai-embedding-models", "mode": "embedding", @@ -9597,8 +10230,8 @@ "max_tokens": 3072, "max_input_tokens": 3072, "output_vector_size": 768, - "input_cost_per_token": 6.25e-09, - "input_cost_per_token_batch_requests": 5e-09, + "input_cost_per_token": 6.25e-9, + "input_cost_per_token_batch_requests": 5e-9, "output_cost_per_token": 0, "litellm_provider": "vertex_ai-embedding-models", "mode": "embedding", @@ -9608,7 +10241,7 @@ "max_tokens": 3072, "max_input_tokens": 3072, "output_vector_size": 768, - "input_cost_per_token": 6.25e-09, + "input_cost_per_token": 6.25e-9, "output_cost_per_token": 0, "litellm_provider": "vertex_ai-embedding-models", "mode": "embedding", @@ -9618,8 +10251,8 @@ "max_tokens": 4096, "max_input_tokens": 8192, "max_output_tokens": 4096, - "input_cost_per_token": 1.25e-07, - "output_cost_per_token": 1.25e-07, + "input_cost_per_token": 1.25e-7, + "output_cost_per_token": 1.25e-7, "litellm_provider": "palm", "mode": "chat", "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models" @@ -9628,8 +10261,8 @@ "max_tokens": 4096, "max_input_tokens": 8192, "max_output_tokens": 4096, - "input_cost_per_token": 1.25e-07, - "output_cost_per_token": 1.25e-07, + "input_cost_per_token": 1.25e-7, + "output_cost_per_token": 1.25e-7, "litellm_provider": "palm", "mode": "chat", "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models" @@ -9638,8 +10271,8 @@ "max_tokens": 1024, "max_input_tokens": 8192, "max_output_tokens": 1024, - "input_cost_per_token": 1.25e-07, - "output_cost_per_token": 1.25e-07, + "input_cost_per_token": 1.25e-7, + "output_cost_per_token": 1.25e-7, "litellm_provider": "palm", "mode": "completion", "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models" @@ -9648,8 +10281,8 @@ "max_tokens": 1024, "max_input_tokens": 8192, "max_output_tokens": 1024, - "input_cost_per_token": 1.25e-07, - "output_cost_per_token": 1.25e-07, + "input_cost_per_token": 1.25e-7, + "output_cost_per_token": 1.25e-7, "litellm_provider": "palm", "mode": "completion", "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models" @@ -9658,8 +10291,8 @@ "max_tokens": 1024, "max_input_tokens": 8192, "max_output_tokens": 1024, - "input_cost_per_token": 1.25e-07, - "output_cost_per_token": 1.25e-07, + "input_cost_per_token": 1.25e-7, + "output_cost_per_token": 1.25e-7, "litellm_provider": "palm", "mode": "completion", "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models" @@ -9668,8 +10301,8 @@ "max_tokens": 1024, "max_input_tokens": 8192, "max_output_tokens": 1024, - "input_cost_per_token": 1.25e-07, - "output_cost_per_token": 1.25e-07, + "input_cost_per_token": 1.25e-7, + "output_cost_per_token": 1.25e-7, "litellm_provider": "palm", "mode": "completion", "source": "https://cloud.google.com/vertex-ai/generative-ai/docs/learn/models#foundation_models" @@ -9684,12 +10317,12 @@ "max_audio_length_hours": 8.4, "max_audio_per_prompt": 1, "max_pdf_size_mb": 30, - "cache_read_input_token_cost": 1.875e-08, - "cache_creation_input_token_cost": 1e-06, - "input_cost_per_token": 7.5e-08, - "input_cost_per_token_above_128k_tokens": 1.5e-07, - "output_cost_per_token": 3e-07, - "output_cost_per_token_above_128k_tokens": 6e-07, + "cache_read_input_token_cost": 1.875e-8, + "cache_creation_input_token_cost": 0.000001, + "input_cost_per_token": 7.5e-8, + "input_cost_per_token_above_128k_tokens": 1.5e-7, + "output_cost_per_token": 3e-7, + "output_cost_per_token_above_128k_tokens": 6e-7, "litellm_provider": "gemini", "mode": "chat", "supports_system_messages": true, @@ -9713,12 +10346,12 @@ "max_audio_length_hours": 8.4, "max_audio_per_prompt": 1, "max_pdf_size_mb": 30, - "cache_read_input_token_cost": 1.875e-08, - "cache_creation_input_token_cost": 1e-06, - "input_cost_per_token": 7.5e-08, - "input_cost_per_token_above_128k_tokens": 1.5e-07, - "output_cost_per_token": 3e-07, - "output_cost_per_token_above_128k_tokens": 6e-07, + "cache_read_input_token_cost": 1.875e-8, + "cache_creation_input_token_cost": 0.000001, + "input_cost_per_token": 7.5e-8, + "input_cost_per_token_above_128k_tokens": 1.5e-7, + "output_cost_per_token": 3e-7, + "output_cost_per_token_above_128k_tokens": 6e-7, "litellm_provider": "gemini", "mode": "chat", "supports_system_messages": true, @@ -9742,10 +10375,10 @@ "max_audio_length_hours": 8.4, "max_audio_per_prompt": 1, "max_pdf_size_mb": 30, - "input_cost_per_token": 7.5e-08, - "input_cost_per_token_above_128k_tokens": 1.5e-07, - "output_cost_per_token": 3e-07, - "output_cost_per_token_above_128k_tokens": 6e-07, + "input_cost_per_token": 7.5e-8, + "input_cost_per_token_above_128k_tokens": 1.5e-7, + "output_cost_per_token": 3e-7, + "output_cost_per_token_above_128k_tokens": 6e-7, "litellm_provider": "gemini", "mode": "chat", "supports_system_messages": true, @@ -9767,10 +10400,10 @@ "max_audio_length_hours": 8.4, "max_audio_per_prompt": 1, "max_pdf_size_mb": 30, - "input_cost_per_token": 7.5e-08, - "input_cost_per_token_above_128k_tokens": 1.5e-07, - "output_cost_per_token": 3e-07, - "output_cost_per_token_above_128k_tokens": 6e-07, + "input_cost_per_token": 7.5e-8, + "input_cost_per_token_above_128k_tokens": 1.5e-7, + "output_cost_per_token": 3e-7, + "output_cost_per_token_above_128k_tokens": 6e-7, "litellm_provider": "gemini", "mode": "chat", "supports_system_messages": true, @@ -9947,10 +10580,10 @@ "max_tokens": 8192, "max_input_tokens": 32760, "max_output_tokens": 8192, - "input_cost_per_token": 3.5e-07, - "input_cost_per_token_above_128k_tokens": 7e-07, - "output_cost_per_token": 1.05e-06, - "output_cost_per_token_above_128k_tokens": 2.1e-06, + "input_cost_per_token": 3.5e-7, + "input_cost_per_token_above_128k_tokens": 7e-7, + "output_cost_per_token": 0.00000105, + "output_cost_per_token_above_128k_tokens": 0.0000021, "litellm_provider": "gemini", "mode": "chat", "supports_function_calling": true, @@ -9964,10 +10597,10 @@ "max_tokens": 8192, "max_input_tokens": 2097152, "max_output_tokens": 8192, - "input_cost_per_token": 3.5e-06, - "input_cost_per_token_above_128k_tokens": 7e-06, - "output_cost_per_token": 1.05e-05, - "output_cost_per_token_above_128k_tokens": 2.1e-05, + "input_cost_per_token": 0.0000035, + "input_cost_per_token_above_128k_tokens": 0.000007, + "output_cost_per_token": 0.0000105, + "output_cost_per_token_above_128k_tokens": 0.000021, "litellm_provider": "gemini", "mode": "chat", "supports_system_messages": true, @@ -9983,10 +10616,10 @@ "max_tokens": 8192, "max_input_tokens": 2097152, "max_output_tokens": 8192, - "input_cost_per_token": 3.5e-06, - "input_cost_per_token_above_128k_tokens": 7e-06, - "output_cost_per_token": 1.05e-05, - "output_cost_per_token_above_128k_tokens": 2.1e-05, + "input_cost_per_token": 0.0000035, + "input_cost_per_token_above_128k_tokens": 0.000007, + "output_cost_per_token": 0.0000105, + "output_cost_per_token_above_128k_tokens": 0.000021, "litellm_provider": "gemini", "mode": "chat", "supports_system_messages": true, @@ -10004,10 +10637,10 @@ "max_tokens": 8192, "max_input_tokens": 2097152, "max_output_tokens": 8192, - "input_cost_per_token": 3.5e-06, - "input_cost_per_token_above_128k_tokens": 7e-06, - "output_cost_per_token": 1.05e-05, - "output_cost_per_token_above_128k_tokens": 2.1e-05, + "input_cost_per_token": 0.0000035, + "input_cost_per_token_above_128k_tokens": 0.000007, + "output_cost_per_token": 0.0000105, + "output_cost_per_token_above_128k_tokens": 0.000021, "litellm_provider": "gemini", "mode": "chat", "supports_system_messages": true, @@ -10025,10 +10658,10 @@ "max_tokens": 8192, "max_input_tokens": 2097152, "max_output_tokens": 8192, - "input_cost_per_token": 3.5e-06, - "input_cost_per_token_above_128k_tokens": 7e-06, - "output_cost_per_token": 1.05e-05, - "output_cost_per_token_above_128k_tokens": 2.1e-05, + "input_cost_per_token": 0.0000035, + "input_cost_per_token_above_128k_tokens": 0.000007, + "output_cost_per_token": 0.0000105, + "output_cost_per_token_above_128k_tokens": 0.000021, "litellm_provider": "gemini", "mode": "chat", "supports_system_messages": true, @@ -10063,10 +10696,10 @@ "max_tokens": 8192, "max_input_tokens": 1048576, "max_output_tokens": 8192, - "input_cost_per_token": 3.5e-06, - "input_cost_per_token_above_128k_tokens": 7e-06, - "output_cost_per_token": 1.05e-06, - "output_cost_per_token_above_128k_tokens": 2.1e-05, + "input_cost_per_token": 0.0000035, + "input_cost_per_token_above_128k_tokens": 0.000007, + "output_cost_per_token": 0.00000105, + "output_cost_per_token_above_128k_tokens": 0.000021, "litellm_provider": "gemini", "mode": "chat", "supports_system_messages": true, @@ -10082,10 +10715,10 @@ "max_tokens": 2048, "max_input_tokens": 30720, "max_output_tokens": 2048, - "input_cost_per_token": 3.5e-07, - "input_cost_per_token_above_128k_tokens": 7e-07, - "output_cost_per_token": 1.05e-06, - "output_cost_per_token_above_128k_tokens": 2.1e-06, + "input_cost_per_token": 3.5e-7, + "input_cost_per_token_above_128k_tokens": 7e-7, + "output_cost_per_token": 0.00000105, + "output_cost_per_token_above_128k_tokens": 0.0000021, "litellm_provider": "gemini", "mode": "chat", "supports_function_calling": true, @@ -10099,8 +10732,8 @@ "gemini/gemini-gemma-2-27b-it": { "max_tokens": 8192, "max_output_tokens": 8192, - "input_cost_per_token": 3.5e-07, - "output_cost_per_token": 1.05e-06, + "input_cost_per_token": 3.5e-7, + "output_cost_per_token": 0.00000105, "litellm_provider": "gemini", "mode": "chat", "supports_function_calling": true, @@ -10111,8 +10744,8 @@ "gemini/gemini-gemma-2-9b-it": { "max_tokens": 8192, "max_output_tokens": 8192, - "input_cost_per_token": 3.5e-07, - "output_cost_per_token": 1.05e-06, + "input_cost_per_token": 3.5e-7, + "output_cost_per_token": 0.00000105, "litellm_provider": "gemini", "mode": "chat", "supports_function_calling": true, @@ -10160,8 +10793,8 @@ "max_tokens": 8000, "max_input_tokens": 256000, "max_output_tokens": 8000, - "input_cost_per_token": 2.5e-06, - "output_cost_per_token": 1e-05, + "input_cost_per_token": 0.0000025, + "output_cost_per_token": 0.00001, "litellm_provider": "cohere_chat", "mode": "chat", "supports_function_calling": true, @@ -10171,8 +10804,8 @@ "max_tokens": 4096, "max_input_tokens": 128000, "max_output_tokens": 4096, - "input_cost_per_token": 1.5e-07, - "output_cost_per_token": 6e-07, + "input_cost_per_token": 1.5e-7, + "output_cost_per_token": 6e-7, "litellm_provider": "cohere_chat", "mode": "chat", "supports_function_calling": true, @@ -10182,8 +10815,8 @@ "max_tokens": 4096, "max_input_tokens": 128000, "max_output_tokens": 4096, - "input_cost_per_token": 1.5e-07, - "output_cost_per_token": 6e-07, + "input_cost_per_token": 1.5e-7, + "output_cost_per_token": 6e-7, "litellm_provider": "cohere_chat", "mode": "chat", "supports_function_calling": true, @@ -10193,8 +10826,8 @@ "max_tokens": 4096, "max_input_tokens": 128000, "max_output_tokens": 4096, - "input_cost_per_token": 1.5e-07, - "output_cost_per_token": 3.75e-08, + "input_cost_per_token": 1.5e-7, + "output_cost_per_token": 3.75e-8, "litellm_provider": "cohere_chat", "mode": "chat", "supports_function_calling": true, @@ -10205,8 +10838,8 @@ "max_tokens": 4096, "max_input_tokens": 4096, "max_output_tokens": 4096, - "input_cost_per_token": 3e-07, - "output_cost_per_token": 6e-07, + "input_cost_per_token": 3e-7, + "output_cost_per_token": 6e-7, "litellm_provider": "cohere_chat", "mode": "chat", "supports_tool_choice": true @@ -10215,8 +10848,8 @@ "max_tokens": 4096, "max_input_tokens": 128000, "max_output_tokens": 4096, - "input_cost_per_token": 2.5e-06, - "output_cost_per_token": 1e-05, + "input_cost_per_token": 0.0000025, + "output_cost_per_token": 0.00001, "litellm_provider": "cohere_chat", "mode": "chat", "supports_function_calling": true, @@ -10226,8 +10859,8 @@ "max_tokens": 4096, "max_input_tokens": 128000, "max_output_tokens": 4096, - "input_cost_per_token": 2.5e-06, - "output_cost_per_token": 1e-05, + "input_cost_per_token": 0.0000025, + "output_cost_per_token": 0.00001, "litellm_provider": "cohere_chat", "mode": "chat", "supports_function_calling": true, @@ -10237,8 +10870,8 @@ "max_tokens": 4096, "max_input_tokens": 4096, "max_output_tokens": 4096, - "input_cost_per_token": 1e-06, - "output_cost_per_token": 2e-06, + "input_cost_per_token": 0.000001, + "output_cost_per_token": 0.000002, "litellm_provider": "cohere", "mode": "completion" }, @@ -10246,8 +10879,8 @@ "max_tokens": 4096, "max_input_tokens": 4096, "max_output_tokens": 4096, - "input_cost_per_token": 1e-06, - "output_cost_per_token": 2e-06, + "input_cost_per_token": 0.000001, + "output_cost_per_token": 0.000002, "litellm_provider": "cohere", "mode": "completion" }, @@ -10256,9 +10889,9 @@ "max_input_tokens": 4096, "max_output_tokens": 4096, "max_query_tokens": 2048, - "input_cost_per_token": 0.0, + "input_cost_per_token": 0, "input_cost_per_query": 0.002, - "output_cost_per_token": 0.0, + "output_cost_per_token": 0, "litellm_provider": "cohere", "mode": "rerank" }, @@ -10267,9 +10900,9 @@ "max_input_tokens": 4096, "max_output_tokens": 4096, "max_query_tokens": 2048, - "input_cost_per_token": 0.0, + "input_cost_per_token": 0, "input_cost_per_query": 0.002, - "output_cost_per_token": 0.0, + "output_cost_per_token": 0, "litellm_provider": "cohere", "mode": "rerank" }, @@ -10278,9 +10911,9 @@ "max_input_tokens": 4096, "max_output_tokens": 4096, "max_query_tokens": 2048, - "input_cost_per_token": 0.0, + "input_cost_per_token": 0, "input_cost_per_query": 0.002, - "output_cost_per_token": 0.0, + "output_cost_per_token": 0, "litellm_provider": "cohere", "mode": "rerank" }, @@ -10289,9 +10922,9 @@ "max_input_tokens": 4096, "max_output_tokens": 4096, "max_query_tokens": 2048, - "input_cost_per_token": 0.0, + "input_cost_per_token": 0, "input_cost_per_query": 0.002, - "output_cost_per_token": 0.0, + "output_cost_per_token": 0, "litellm_provider": "cohere", "mode": "rerank" }, @@ -10300,25 +10933,25 @@ "max_input_tokens": 4096, "max_output_tokens": 4096, "max_query_tokens": 2048, - "input_cost_per_token": 0.0, + "input_cost_per_token": 0, "input_cost_per_query": 0.002, - "output_cost_per_token": 0.0, + "output_cost_per_token": 0, "litellm_provider": "cohere", "mode": "rerank" }, "embed-english-light-v3.0": { "max_tokens": 1024, "max_input_tokens": 1024, - "input_cost_per_token": 1e-07, - "output_cost_per_token": 0.0, + "input_cost_per_token": 1e-7, + "output_cost_per_token": 0, "litellm_provider": "cohere", "mode": "embedding" }, "embed-multilingual-v3.0": { "max_tokens": 1024, "max_input_tokens": 1024, - "input_cost_per_token": 1e-07, - "output_cost_per_token": 0.0, + "input_cost_per_token": 1e-7, + "output_cost_per_token": 0, "litellm_provider": "cohere", "supports_embedding_image_input": true, "mode": "embedding" @@ -10326,33 +10959,33 @@ "embed-english-v2.0": { "max_tokens": 4096, "max_input_tokens": 4096, - "input_cost_per_token": 1e-07, - "output_cost_per_token": 0.0, + "input_cost_per_token": 1e-7, + "output_cost_per_token": 0, "litellm_provider": "cohere", "mode": "embedding" }, "embed-english-light-v2.0": { "max_tokens": 1024, "max_input_tokens": 1024, - "input_cost_per_token": 1e-07, - "output_cost_per_token": 0.0, + "input_cost_per_token": 1e-7, + "output_cost_per_token": 0, "litellm_provider": "cohere", "mode": "embedding" }, "embed-multilingual-v2.0": { "max_tokens": 768, "max_input_tokens": 768, - "input_cost_per_token": 1e-07, - "output_cost_per_token": 0.0, + "input_cost_per_token": 1e-7, + "output_cost_per_token": 0, "litellm_provider": "cohere", "mode": "embedding" }, "embed-english-v3.0": { "max_tokens": 1024, "max_input_tokens": 1024, - "input_cost_per_token": 1e-07, + "input_cost_per_token": 1e-7, "input_cost_per_image": 0.0001, - "output_cost_per_token": 0.0, + "output_cost_per_token": 0, "litellm_provider": "cohere", "mode": "embedding", "supports_image_input": true, @@ -10365,8 +10998,8 @@ "max_tokens": 4096, "max_input_tokens": 4096, "max_output_tokens": 4096, - "input_cost_per_token": 1e-07, - "output_cost_per_token": 5e-07, + "input_cost_per_token": 1e-7, + "output_cost_per_token": 5e-7, "litellm_provider": "replicate", "mode": "chat", "supports_tool_choice": true @@ -10375,8 +11008,8 @@ "max_tokens": 4096, "max_input_tokens": 4096, "max_output_tokens": 4096, - "input_cost_per_token": 1e-07, - "output_cost_per_token": 5e-07, + "input_cost_per_token": 1e-7, + "output_cost_per_token": 5e-7, "litellm_provider": "replicate", "mode": "chat", "supports_tool_choice": true @@ -10385,8 +11018,8 @@ "max_tokens": 4096, "max_input_tokens": 4096, "max_output_tokens": 4096, - "input_cost_per_token": 6.5e-07, - "output_cost_per_token": 2.75e-06, + "input_cost_per_token": 6.5e-7, + "output_cost_per_token": 0.00000275, "litellm_provider": "replicate", "mode": "chat", "supports_tool_choice": true @@ -10395,8 +11028,8 @@ "max_tokens": 4096, "max_input_tokens": 4096, "max_output_tokens": 4096, - "input_cost_per_token": 6.5e-07, - "output_cost_per_token": 2.75e-06, + "input_cost_per_token": 6.5e-7, + "output_cost_per_token": 0.00000275, "litellm_provider": "replicate", "mode": "chat", "supports_tool_choice": true @@ -10405,8 +11038,8 @@ "max_tokens": 4096, "max_input_tokens": 4096, "max_output_tokens": 4096, - "input_cost_per_token": 5e-08, - "output_cost_per_token": 2.5e-07, + "input_cost_per_token": 5e-8, + "output_cost_per_token": 2.5e-7, "litellm_provider": "replicate", "mode": "chat", "supports_tool_choice": true @@ -10415,8 +11048,8 @@ "max_tokens": 4096, "max_input_tokens": 4096, "max_output_tokens": 4096, - "input_cost_per_token": 5e-08, - "output_cost_per_token": 2.5e-07, + "input_cost_per_token": 5e-8, + "output_cost_per_token": 2.5e-7, "litellm_provider": "replicate", "mode": "chat", "supports_tool_choice": true @@ -10425,8 +11058,8 @@ "max_tokens": 8192, "max_input_tokens": 8192, "max_output_tokens": 8192, - "input_cost_per_token": 6.5e-07, - "output_cost_per_token": 2.75e-06, + "input_cost_per_token": 6.5e-7, + "output_cost_per_token": 0.00000275, "litellm_provider": "replicate", "mode": "chat", "supports_tool_choice": true @@ -10435,8 +11068,8 @@ "max_tokens": 8192, "max_input_tokens": 8192, "max_output_tokens": 8192, - "input_cost_per_token": 6.5e-07, - "output_cost_per_token": 2.75e-06, + "input_cost_per_token": 6.5e-7, + "output_cost_per_token": 0.00000275, "litellm_provider": "replicate", "mode": "chat", "supports_tool_choice": true @@ -10445,8 +11078,8 @@ "max_tokens": 8086, "max_input_tokens": 8086, "max_output_tokens": 8086, - "input_cost_per_token": 5e-08, - "output_cost_per_token": 2.5e-07, + "input_cost_per_token": 5e-8, + "output_cost_per_token": 2.5e-7, "litellm_provider": "replicate", "mode": "chat", "supports_tool_choice": true @@ -10455,8 +11088,8 @@ "max_tokens": 8086, "max_input_tokens": 8086, "max_output_tokens": 8086, - "input_cost_per_token": 5e-08, - "output_cost_per_token": 2.5e-07, + "input_cost_per_token": 5e-8, + "output_cost_per_token": 2.5e-7, "litellm_provider": "replicate", "mode": "chat", "supports_tool_choice": true @@ -10465,8 +11098,8 @@ "max_tokens": 4096, "max_input_tokens": 4096, "max_output_tokens": 4096, - "input_cost_per_token": 5e-08, - "output_cost_per_token": 2.5e-07, + "input_cost_per_token": 5e-8, + "output_cost_per_token": 2.5e-7, "litellm_provider": "replicate", "mode": "chat", "supports_tool_choice": true @@ -10475,8 +11108,8 @@ "max_tokens": 4096, "max_input_tokens": 4096, "max_output_tokens": 4096, - "input_cost_per_token": 5e-08, - "output_cost_per_token": 2.5e-07, + "input_cost_per_token": 5e-8, + "output_cost_per_token": 2.5e-7, "litellm_provider": "replicate", "mode": "chat", "supports_tool_choice": true @@ -10485,8 +11118,8 @@ "max_tokens": 4096, "max_input_tokens": 4096, "max_output_tokens": 4096, - "input_cost_per_token": 3e-07, - "output_cost_per_token": 1e-06, + "input_cost_per_token": 3e-7, + "output_cost_per_token": 0.000001, "litellm_provider": "replicate", "mode": "chat", "supports_tool_choice": true @@ -10495,9 +11128,9 @@ "max_tokens": 8192, "max_input_tokens": 65336, "max_output_tokens": 8192, - "input_cost_per_token": 5e-07, - "input_cost_per_token_cache_hit": 1.4e-07, - "output_cost_per_token": 2.15e-06, + "input_cost_per_token": 5e-7, + "input_cost_per_token_cache_hit": 1.4e-7, + "output_cost_per_token": 0.00000215, "litellm_provider": "openrouter", "mode": "chat", "supports_function_calling": true, @@ -10510,8 +11143,8 @@ "max_tokens": 256000, "max_input_tokens": 256000, "max_output_tokens": 256000, - "input_cost_per_token": 3e-06, - "output_cost_per_token": 1.5e-05, + "input_cost_per_token": 0.000003, + "output_cost_per_token": 0.000015, "litellm_provider": "openrouter", "mode": "chat", "supports_function_calling": true, @@ -10524,8 +11157,8 @@ "max_tokens": 2048, "max_input_tokens": 131072, "max_output_tokens": 2048, - "input_cost_per_token": 1e-07, - "output_cost_per_token": 2e-07, + "input_cost_per_token": 1e-7, + "output_cost_per_token": 2e-7, "litellm_provider": "openrouter", "mode": "chat", "source": "https://openrouter.ai/api/v1/models/bytedance/ui-tars-1.5-7b", @@ -10535,9 +11168,9 @@ "max_tokens": 8192, "max_input_tokens": 65336, "max_output_tokens": 8192, - "input_cost_per_token": 5.5e-07, - "input_cost_per_token_cache_hit": 1.4e-07, - "output_cost_per_token": 2.19e-06, + "input_cost_per_token": 5.5e-7, + "input_cost_per_token_cache_hit": 1.4e-7, + "output_cost_per_token": 0.00000219, "litellm_provider": "openrouter", "mode": "chat", "supports_function_calling": true, @@ -10550,8 +11183,8 @@ "max_tokens": 8192, "max_input_tokens": 65536, "max_output_tokens": 8192, - "input_cost_per_token": 1.4e-07, - "output_cost_per_token": 2.8e-07, + "input_cost_per_token": 1.4e-7, + "output_cost_per_token": 2.8e-7, "litellm_provider": "openrouter", "supports_prompt_caching": true, "mode": "chat", @@ -10561,8 +11194,8 @@ "max_tokens": 8192, "max_input_tokens": 66000, "max_output_tokens": 4096, - "input_cost_per_token": 1.4e-07, - "output_cost_per_token": 2.8e-07, + "input_cost_per_token": 1.4e-7, + "output_cost_per_token": 2.8e-7, "litellm_provider": "openrouter", "supports_prompt_caching": true, "mode": "chat", @@ -10570,8 +11203,8 @@ }, "openrouter/microsoft/wizardlm-2-8x22b:nitro": { "max_tokens": 65536, - "input_cost_per_token": 1e-06, - "output_cost_per_token": 1e-06, + "input_cost_per_token": 0.000001, + "output_cost_per_token": 0.000001, "litellm_provider": "openrouter", "mode": "chat", "supports_tool_choice": true @@ -10586,9 +11219,9 @@ "max_audio_length_hours": 8.4, "max_audio_per_prompt": 1, "max_pdf_size_mb": 30, - "input_cost_per_audio_token": 7e-07, - "input_cost_per_token": 1.25e-06, - "output_cost_per_token": 1e-05, + "input_cost_per_audio_token": 7e-7, + "input_cost_per_token": 0.00000125, + "output_cost_per_token": 0.00001, "litellm_provider": "openrouter", "mode": "chat", "supports_system_messages": true, @@ -10602,8 +11235,8 @@ "max_tokens": 8192, "max_input_tokens": 1000000, "max_output_tokens": 8192, - "input_cost_per_token": 2.5e-06, - "output_cost_per_token": 7.5e-06, + "input_cost_per_token": 0.0000025, + "output_cost_per_token": 0.0000075, "input_cost_per_image": 0.00265, "litellm_provider": "openrouter", "mode": "chat", @@ -10621,9 +11254,9 @@ "max_audio_length_hours": 8.4, "max_audio_per_prompt": 1, "max_pdf_size_mb": 30, - "input_cost_per_audio_token": 7e-07, - "input_cost_per_token": 1e-07, - "output_cost_per_token": 4e-07, + "input_cost_per_audio_token": 7e-7, + "input_cost_per_token": 1e-7, + "output_cost_per_token": 4e-7, "litellm_provider": "openrouter", "mode": "chat", "supports_system_messages": true, @@ -10643,9 +11276,9 @@ "max_audio_length_hours": 8.4, "max_audio_per_prompt": 1, "max_pdf_size_mb": 30, - "input_cost_per_audio_token": 7e-07, - "input_cost_per_token": 3e-07, - "output_cost_per_token": 2.5e-06, + "input_cost_per_audio_token": 7e-7, + "input_cost_per_token": 3e-7, + "output_cost_per_token": 0.0000025, "litellm_provider": "openrouter", "mode": "chat", "supports_system_messages": true, @@ -10657,32 +11290,32 @@ }, "openrouter/mistralai/mixtral-8x22b-instruct": { "max_tokens": 65536, - "input_cost_per_token": 6.5e-07, - "output_cost_per_token": 6.5e-07, + "input_cost_per_token": 6.5e-7, + "output_cost_per_token": 6.5e-7, "litellm_provider": "openrouter", "mode": "chat", "supports_tool_choice": true }, "openrouter/cohere/command-r-plus": { "max_tokens": 128000, - "input_cost_per_token": 3e-06, - "output_cost_per_token": 1.5e-05, + "input_cost_per_token": 0.000003, + "output_cost_per_token": 0.000015, "litellm_provider": "openrouter", "mode": "chat", "supports_tool_choice": true }, "openrouter/databricks/dbrx-instruct": { "max_tokens": 32768, - "input_cost_per_token": 6e-07, - "output_cost_per_token": 6e-07, + "input_cost_per_token": 6e-7, + "output_cost_per_token": 6e-7, "litellm_provider": "openrouter", "mode": "chat", "supports_tool_choice": true }, "openrouter/anthropic/claude-3-haiku": { "max_tokens": 200000, - "input_cost_per_token": 2.5e-07, - "output_cost_per_token": 1.25e-06, + "input_cost_per_token": 2.5e-7, + "output_cost_per_token": 0.00000125, "input_cost_per_image": 0.0004, "litellm_provider": "openrouter", "mode": "chat", @@ -10692,8 +11325,8 @@ }, "openrouter/anthropic/claude-3-5-haiku": { "max_tokens": 200000, - "input_cost_per_token": 1e-06, - "output_cost_per_token": 5e-06, + "input_cost_per_token": 0.000001, + "output_cost_per_token": 0.000005, "litellm_provider": "openrouter", "mode": "chat", "supports_function_calling": true, @@ -10703,8 +11336,8 @@ "max_tokens": 4096, "max_input_tokens": 200000, "max_output_tokens": 4096, - "input_cost_per_token": 2.5e-07, - "output_cost_per_token": 1.25e-06, + "input_cost_per_token": 2.5e-7, + "output_cost_per_token": 0.00000125, "litellm_provider": "openrouter", "mode": "chat", "supports_function_calling": true, @@ -10716,8 +11349,8 @@ "max_tokens": 8192, "max_input_tokens": 200000, "max_output_tokens": 8192, - "input_cost_per_token": 1e-06, - "output_cost_per_token": 5e-06, + "input_cost_per_token": 0.000001, + "output_cost_per_token": 0.000005, "litellm_provider": "openrouter", "mode": "chat", "supports_function_calling": true, @@ -10729,8 +11362,8 @@ "max_tokens": 8192, "max_input_tokens": 200000, "max_output_tokens": 8192, - "input_cost_per_token": 3e-06, - "output_cost_per_token": 1.5e-05, + "input_cost_per_token": 0.000003, + "output_cost_per_token": 0.000015, "litellm_provider": "openrouter", "mode": "chat", "supports_function_calling": true, @@ -10744,8 +11377,8 @@ "max_tokens": 8192, "max_input_tokens": 200000, "max_output_tokens": 8192, - "input_cost_per_token": 3e-06, - "output_cost_per_token": 1.5e-05, + "input_cost_per_token": 0.000003, + "output_cost_per_token": 0.000015, "litellm_provider": "openrouter", "mode": "chat", "supports_function_calling": true, @@ -10758,8 +11391,8 @@ "max_tokens": 8192, "max_input_tokens": 200000, "max_output_tokens": 8192, - "input_cost_per_token": 3e-06, - "output_cost_per_token": 1.5e-05, + "input_cost_per_token": 0.000003, + "output_cost_per_token": 0.000015, "input_cost_per_image": 0.0048, "litellm_provider": "openrouter", "mode": "chat", @@ -10775,8 +11408,8 @@ "max_tokens": 8192, "max_input_tokens": 200000, "max_output_tokens": 8192, - "input_cost_per_token": 3e-06, - "output_cost_per_token": 1.5e-05, + "input_cost_per_token": 0.000003, + "output_cost_per_token": 0.000015, "input_cost_per_image": 0.0048, "litellm_provider": "openrouter", "mode": "chat", @@ -10788,8 +11421,8 @@ }, "openrouter/anthropic/claude-3-sonnet": { "max_tokens": 200000, - "input_cost_per_token": 3e-06, - "output_cost_per_token": 1.5e-05, + "input_cost_per_token": 0.000003, + "output_cost_per_token": 0.000015, "input_cost_per_image": 0.0048, "litellm_provider": "openrouter", "mode": "chat", @@ -10802,8 +11435,8 @@ "max_tokens": 8192, "max_input_tokens": 200000, "max_output_tokens": 8192, - "input_cost_per_token": 3e-06, - "output_cost_per_token": 1.5e-05, + "input_cost_per_token": 0.000003, + "output_cost_per_token": 0.000015, "input_cost_per_image": 0.0048, "litellm_provider": "openrouter", "mode": "chat", @@ -10816,40 +11449,40 @@ }, "openrouter/mistralai/mistral-large": { "max_tokens": 32000, - "input_cost_per_token": 8e-06, - "output_cost_per_token": 2.4e-05, + "input_cost_per_token": 0.000008, + "output_cost_per_token": 0.000024, "litellm_provider": "openrouter", "mode": "chat", "supports_tool_choice": true }, "openrouter/mistralai/mistral-small-3.1-24b-instruct": { "max_tokens": 32000, - "input_cost_per_token": 1e-07, - "output_cost_per_token": 3e-07, + "input_cost_per_token": 1e-7, + "output_cost_per_token": 3e-7, "litellm_provider": "openrouter", "mode": "chat", "supports_tool_choice": true }, "openrouter/mistralai/mistral-small-3.2-24b-instruct": { "max_tokens": 32000, - "input_cost_per_token": 1e-07, - "output_cost_per_token": 3e-07, + "input_cost_per_token": 1e-7, + "output_cost_per_token": 3e-7, "litellm_provider": "openrouter", "mode": "chat", "supports_tool_choice": true }, "openrouter/cognitivecomputations/dolphin-mixtral-8x7b": { "max_tokens": 32769, - "input_cost_per_token": 5e-07, - "output_cost_per_token": 5e-07, + "input_cost_per_token": 5e-7, + "output_cost_per_token": 5e-7, "litellm_provider": "openrouter", "mode": "chat", "supports_tool_choice": true }, "openrouter/google/gemini-pro-vision": { "max_tokens": 45875, - "input_cost_per_token": 1.25e-07, - "output_cost_per_token": 3.75e-07, + "input_cost_per_token": 1.25e-7, + "output_cost_per_token": 3.75e-7, "input_cost_per_image": 0.0025, "litellm_provider": "openrouter", "mode": "chat", @@ -10859,40 +11492,40 @@ }, "openrouter/fireworks/firellava-13b": { "max_tokens": 4096, - "input_cost_per_token": 2e-07, - "output_cost_per_token": 2e-07, + "input_cost_per_token": 2e-7, + "output_cost_per_token": 2e-7, "litellm_provider": "openrouter", "mode": "chat", "supports_tool_choice": true }, "openrouter/meta-llama/llama-3-8b-instruct:free": { "max_tokens": 8192, - "input_cost_per_token": 0.0, - "output_cost_per_token": 0.0, + "input_cost_per_token": 0, + "output_cost_per_token": 0, "litellm_provider": "openrouter", "mode": "chat", "supports_tool_choice": true }, "openrouter/meta-llama/llama-3-8b-instruct:extended": { "max_tokens": 16384, - "input_cost_per_token": 2.25e-07, - "output_cost_per_token": 2.25e-06, + "input_cost_per_token": 2.25e-7, + "output_cost_per_token": 0.00000225, "litellm_provider": "openrouter", "mode": "chat", "supports_tool_choice": true }, "openrouter/meta-llama/llama-3-70b-instruct:nitro": { "max_tokens": 8192, - "input_cost_per_token": 9e-07, - "output_cost_per_token": 9e-07, + "input_cost_per_token": 9e-7, + "output_cost_per_token": 9e-7, "litellm_provider": "openrouter", "mode": "chat", "supports_tool_choice": true }, "openrouter/meta-llama/llama-3-70b-instruct": { "max_tokens": 8192, - "input_cost_per_token": 5.9e-07, - "output_cost_per_token": 7.9e-07, + "input_cost_per_token": 5.9e-7, + "output_cost_per_token": 7.9e-7, "litellm_provider": "openrouter", "mode": "chat", "supports_tool_choice": true @@ -10901,9 +11534,9 @@ "max_tokens": 100000, "max_input_tokens": 200000, "max_output_tokens": 100000, - "input_cost_per_token": 1.5e-05, - "output_cost_per_token": 6e-05, - "cache_read_input_token_cost": 7.5e-06, + "input_cost_per_token": 0.000015, + "output_cost_per_token": 0.00006, + "cache_read_input_token_cost": 0.0000075, "litellm_provider": "openrouter", "mode": "chat", "supports_function_calling": true, @@ -10918,8 +11551,8 @@ "max_tokens": 65536, "max_input_tokens": 128000, "max_output_tokens": 65536, - "input_cost_per_token": 3e-06, - "output_cost_per_token": 1.2e-05, + "input_cost_per_token": 0.000003, + "output_cost_per_token": 0.000012, "litellm_provider": "openrouter", "mode": "chat", "supports_function_calling": true, @@ -10931,8 +11564,8 @@ "max_tokens": 65536, "max_input_tokens": 128000, "max_output_tokens": 65536, - "input_cost_per_token": 3e-06, - "output_cost_per_token": 1.2e-05, + "input_cost_per_token": 0.000003, + "output_cost_per_token": 0.000012, "litellm_provider": "openrouter", "mode": "chat", "supports_function_calling": true, @@ -10944,8 +11577,8 @@ "max_tokens": 32768, "max_input_tokens": 128000, "max_output_tokens": 32768, - "input_cost_per_token": 1.5e-05, - "output_cost_per_token": 6e-05, + "input_cost_per_token": 0.000015, + "output_cost_per_token": 0.00006, "litellm_provider": "openrouter", "mode": "chat", "supports_function_calling": true, @@ -10957,8 +11590,8 @@ "max_tokens": 32768, "max_input_tokens": 128000, "max_output_tokens": 32768, - "input_cost_per_token": 1.5e-05, - "output_cost_per_token": 6e-05, + "input_cost_per_token": 0.000015, + "output_cost_per_token": 0.00006, "litellm_provider": "openrouter", "mode": "chat", "supports_function_calling": true, @@ -10970,8 +11603,8 @@ "max_tokens": 65536, "max_input_tokens": 128000, "max_output_tokens": 65536, - "input_cost_per_token": 1.1e-06, - "output_cost_per_token": 4.4e-06, + "input_cost_per_token": 0.0000011, + "output_cost_per_token": 0.0000044, "litellm_provider": "openrouter", "mode": "chat", "supports_function_calling": true, @@ -10984,8 +11617,8 @@ "max_tokens": 65536, "max_input_tokens": 128000, "max_output_tokens": 65536, - "input_cost_per_token": 1.1e-06, - "output_cost_per_token": 4.4e-06, + "input_cost_per_token": 0.0000011, + "output_cost_per_token": 0.0000044, "litellm_provider": "openrouter", "mode": "chat", "supports_function_calling": true, @@ -10998,8 +11631,8 @@ "max_tokens": 4096, "max_input_tokens": 128000, "max_output_tokens": 4096, - "input_cost_per_token": 2.5e-06, - "output_cost_per_token": 1e-05, + "input_cost_per_token": 0.0000025, + "output_cost_per_token": 0.00001, "litellm_provider": "openrouter", "mode": "chat", "supports_function_calling": true, @@ -11011,8 +11644,8 @@ "max_tokens": 4096, "max_input_tokens": 128000, "max_output_tokens": 4096, - "input_cost_per_token": 5e-06, - "output_cost_per_token": 1.5e-05, + "input_cost_per_token": 0.000005, + "output_cost_per_token": 0.000015, "litellm_provider": "openrouter", "mode": "chat", "supports_function_calling": true, @@ -11022,8 +11655,8 @@ }, "openrouter/openai/gpt-4-vision-preview": { "max_tokens": 130000, - "input_cost_per_token": 1e-05, - "output_cost_per_token": 3e-05, + "input_cost_per_token": 0.00001, + "output_cost_per_token": 0.00003, "input_cost_per_image": 0.01445, "litellm_provider": "openrouter", "mode": "chat", @@ -11033,33 +11666,63 @@ }, "openrouter/openai/gpt-3.5-turbo": { "max_tokens": 4095, - "input_cost_per_token": 1.5e-06, - "output_cost_per_token": 2e-06, + "input_cost_per_token": 0.0000015, + "output_cost_per_token": 0.000002, "litellm_provider": "openrouter", "mode": "chat", "supports_tool_choice": true }, "openrouter/openai/gpt-3.5-turbo-16k": { "max_tokens": 16383, - "input_cost_per_token": 3e-06, - "output_cost_per_token": 4e-06, + "input_cost_per_token": 0.000003, + "output_cost_per_token": 0.000004, "litellm_provider": "openrouter", "mode": "chat", "supports_tool_choice": true }, "openrouter/openai/gpt-4": { "max_tokens": 8192, - "input_cost_per_token": 3e-05, - "output_cost_per_token": 6e-05, + "input_cost_per_token": 0.00003, + "output_cost_per_token": 0.00006, "litellm_provider": "openrouter", "mode": "chat", "supports_tool_choice": true }, + "openrouter/openai/gpt-oss-20b": { + "max_tokens": 32768, + "max_input_tokens": 131072, + "max_output_tokens": 32768, + "input_cost_per_token": 1.8e-7, + "output_cost_per_token": 8e-7, + "litellm_provider": "openrouter", + "mode": "chat", + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_response_schema": true, + "supports_reasoning": true, + "supports_tool_choice": true, + "source": "https://openrouter.ai/openai/gpt-oss-20b" + }, + "openrouter/openai/gpt-oss-120b": { + "max_tokens": 32768, + "max_input_tokens": 131072, + "max_output_tokens": 32768, + "input_cost_per_token": 1.8e-7, + "output_cost_per_token": 8e-7, + "litellm_provider": "openrouter", + "mode": "chat", + "supports_function_calling": true, + "supports_parallel_function_calling": true, + "supports_response_schema": true, + "supports_reasoning": true, + "supports_tool_choice": true, + "source": "https://openrouter.ai/openai/gpt-oss-120b" + }, "openrouter/anthropic/claude-instant-v1": { "max_tokens": 100000, "max_output_tokens": 8191, - "input_cost_per_token": 1.63e-06, - "output_cost_per_token": 5.51e-06, + "input_cost_per_token": 0.00000163, + "output_cost_per_token": 0.00000551, "litellm_provider": "openrouter", "mode": "chat", "supports_tool_choice": true @@ -11067,8 +11730,8 @@ "openrouter/anthropic/claude-2": { "max_tokens": 100000, "max_output_tokens": 8191, - "input_cost_per_token": 1.102e-05, - "output_cost_per_token": 3.268e-05, + "input_cost_per_token": 0.00001102, + "output_cost_per_token": 0.00003268, "litellm_provider": "openrouter", "mode": "chat", "supports_tool_choice": true @@ -11077,8 +11740,8 @@ "max_tokens": 4096, "max_input_tokens": 200000, "max_output_tokens": 4096, - "input_cost_per_token": 1.5e-05, - "output_cost_per_token": 7.5e-05, + "input_cost_per_token": 0.000015, + "output_cost_per_token": 0.000075, "litellm_provider": "openrouter", "mode": "chat", "supports_function_calling": true, @@ -11088,104 +11751,104 @@ }, "openrouter/google/palm-2-chat-bison": { "max_tokens": 25804, - "input_cost_per_token": 5e-07, - "output_cost_per_token": 5e-07, + "input_cost_per_token": 5e-7, + "output_cost_per_token": 5e-7, "litellm_provider": "openrouter", "mode": "chat", "supports_tool_choice": true }, "openrouter/google/palm-2-codechat-bison": { "max_tokens": 20070, - "input_cost_per_token": 5e-07, - "output_cost_per_token": 5e-07, + "input_cost_per_token": 5e-7, + "output_cost_per_token": 5e-7, "litellm_provider": "openrouter", "mode": "chat", "supports_tool_choice": true }, "openrouter/meta-llama/llama-2-13b-chat": { "max_tokens": 4096, - "input_cost_per_token": 2e-07, - "output_cost_per_token": 2e-07, + "input_cost_per_token": 2e-7, + "output_cost_per_token": 2e-7, "litellm_provider": "openrouter", "mode": "chat", "supports_tool_choice": true }, "openrouter/meta-llama/llama-2-70b-chat": { "max_tokens": 4096, - "input_cost_per_token": 1.5e-06, - "output_cost_per_token": 1.5e-06, + "input_cost_per_token": 0.0000015, + "output_cost_per_token": 0.0000015, "litellm_provider": "openrouter", "mode": "chat", "supports_tool_choice": true }, "openrouter/meta-llama/codellama-34b-instruct": { "max_tokens": 8192, - "input_cost_per_token": 5e-07, - "output_cost_per_token": 5e-07, + "input_cost_per_token": 5e-7, + "output_cost_per_token": 5e-7, "litellm_provider": "openrouter", "mode": "chat", "supports_tool_choice": true }, "openrouter/nousresearch/nous-hermes-llama2-13b": { "max_tokens": 4096, - "input_cost_per_token": 2e-07, - "output_cost_per_token": 2e-07, + "input_cost_per_token": 2e-7, + "output_cost_per_token": 2e-7, "litellm_provider": "openrouter", "mode": "chat", "supports_tool_choice": true }, "openrouter/mancer/weaver": { "max_tokens": 8000, - "input_cost_per_token": 5.625e-06, - "output_cost_per_token": 5.625e-06, + "input_cost_per_token": 0.000005625, + "output_cost_per_token": 0.000005625, "litellm_provider": "openrouter", "mode": "chat", "supports_tool_choice": true }, "openrouter/gryphe/mythomax-l2-13b": { "max_tokens": 8192, - "input_cost_per_token": 1.875e-06, - "output_cost_per_token": 1.875e-06, + "input_cost_per_token": 0.000001875, + "output_cost_per_token": 0.000001875, "litellm_provider": "openrouter", "mode": "chat", "supports_tool_choice": true }, "openrouter/jondurbin/airoboros-l2-70b-2.1": { "max_tokens": 4096, - "input_cost_per_token": 1.3875e-05, - "output_cost_per_token": 1.3875e-05, + "input_cost_per_token": 0.000013875, + "output_cost_per_token": 0.000013875, "litellm_provider": "openrouter", "mode": "chat", "supports_tool_choice": true }, "openrouter/undi95/remm-slerp-l2-13b": { "max_tokens": 6144, - "input_cost_per_token": 1.875e-06, - "output_cost_per_token": 1.875e-06, + "input_cost_per_token": 0.000001875, + "output_cost_per_token": 0.000001875, "litellm_provider": "openrouter", "mode": "chat", "supports_tool_choice": true }, "openrouter/pygmalionai/mythalion-13b": { "max_tokens": 4096, - "input_cost_per_token": 1.875e-06, - "output_cost_per_token": 1.875e-06, + "input_cost_per_token": 0.000001875, + "output_cost_per_token": 0.000001875, "litellm_provider": "openrouter", "mode": "chat", "supports_tool_choice": true }, "openrouter/mistralai/mistral-7b-instruct": { "max_tokens": 8192, - "input_cost_per_token": 1.3e-07, - "output_cost_per_token": 1.3e-07, + "input_cost_per_token": 1.3e-7, + "output_cost_per_token": 1.3e-7, "litellm_provider": "openrouter", "mode": "chat", "supports_tool_choice": true }, "openrouter/mistralai/mistral-7b-instruct:free": { "max_tokens": 8192, - "input_cost_per_token": 0.0, - "output_cost_per_token": 0.0, + "input_cost_per_token": 0, + "output_cost_per_token": 0, "litellm_provider": "openrouter", "mode": "chat", "supports_tool_choice": true @@ -11194,8 +11857,8 @@ "max_tokens": 33792, "max_input_tokens": 33792, "max_output_tokens": 33792, - "input_cost_per_token": 1.8e-07, - "output_cost_per_token": 1.8e-07, + "input_cost_per_token": 1.8e-7, + "output_cost_per_token": 1.8e-7, "litellm_provider": "openrouter", "mode": "chat", "supports_tool_choice": true @@ -11204,8 +11867,8 @@ "max_tokens": 8192, "max_input_tokens": 8192, "max_output_tokens": 2048, - "input_cost_per_token": 2.1e-07, - "output_cost_per_token": 6.3e-07, + "input_cost_per_token": 2.1e-7, + "output_cost_per_token": 6.3e-7, "litellm_provider": "openrouter", "mode": "chat", "supports_tool_choice": true @@ -11214,8 +11877,8 @@ "max_tokens": 1000000, "max_input_tokens": 1000000, "max_output_tokens": 1000000, - "input_cost_per_token": 1e-06, - "output_cost_per_token": 5e-06, + "input_cost_per_token": 0.000001, + "output_cost_per_token": 0.000005, "litellm_provider": "openrouter", "source": "https://openrouter.ai/qwen/qwen3-coder", "mode": "chat", @@ -11225,8 +11888,8 @@ "max_tokens": 131072, "max_input_tokens": 131072, "max_output_tokens": 131072, - "input_cost_per_token": 8.5e-07, - "output_cost_per_token": 3.4e-06, + "input_cost_per_token": 8.5e-7, + "output_cost_per_token": 0.0000034, "litellm_provider": "openrouter", "source": "https://openrouter.ai/switchpoint/router", "mode": "chat", @@ -11236,8 +11899,8 @@ "max_tokens": 8192, "max_input_tokens": 8192, "max_output_tokens": 8192, - "input_cost_per_token": 1.5e-05, - "output_cost_per_token": 1.5e-05, + "input_cost_per_token": 0.000015, + "output_cost_per_token": 0.000015, "litellm_provider": "ai21", "mode": "completion" }, @@ -11245,8 +11908,8 @@ "max_tokens": 256000, "max_input_tokens": 256000, "max_output_tokens": 256000, - "input_cost_per_token": 2e-07, - "output_cost_per_token": 4e-07, + "input_cost_per_token": 2e-7, + "output_cost_per_token": 4e-7, "litellm_provider": "ai21", "mode": "chat", "supports_tool_choice": true @@ -11255,8 +11918,8 @@ "max_tokens": 256000, "max_input_tokens": 256000, "max_output_tokens": 256000, - "input_cost_per_token": 2e-06, - "output_cost_per_token": 8e-06, + "input_cost_per_token": 0.000002, + "output_cost_per_token": 0.000008, "litellm_provider": "ai21", "mode": "chat", "supports_tool_choice": true @@ -11265,8 +11928,8 @@ "max_tokens": 256000, "max_input_tokens": 256000, "max_output_tokens": 256000, - "input_cost_per_token": 2e-07, - "output_cost_per_token": 4e-07, + "input_cost_per_token": 2e-7, + "output_cost_per_token": 4e-7, "litellm_provider": "ai21", "mode": "chat", "supports_tool_choice": true @@ -11275,8 +11938,8 @@ "max_tokens": 256000, "max_input_tokens": 256000, "max_output_tokens": 256000, - "input_cost_per_token": 2e-07, - "output_cost_per_token": 4e-07, + "input_cost_per_token": 2e-7, + "output_cost_per_token": 4e-7, "litellm_provider": "ai21", "mode": "chat", "supports_tool_choice": true @@ -11285,8 +11948,8 @@ "max_tokens": 256000, "max_input_tokens": 256000, "max_output_tokens": 256000, - "input_cost_per_token": 2e-06, - "output_cost_per_token": 8e-06, + "input_cost_per_token": 0.000002, + "output_cost_per_token": 0.000008, "litellm_provider": "ai21", "mode": "chat", "supports_tool_choice": true @@ -11295,8 +11958,8 @@ "max_tokens": 256000, "max_input_tokens": 256000, "max_output_tokens": 256000, - "input_cost_per_token": 2e-06, - "output_cost_per_token": 8e-06, + "input_cost_per_token": 0.000002, + "output_cost_per_token": 0.000008, "litellm_provider": "ai21", "mode": "chat", "supports_tool_choice": true @@ -11305,8 +11968,8 @@ "max_tokens": 256000, "max_input_tokens": 256000, "max_output_tokens": 256000, - "input_cost_per_token": 2e-06, - "output_cost_per_token": 8e-06, + "input_cost_per_token": 0.000002, + "output_cost_per_token": 0.000008, "litellm_provider": "ai21", "mode": "chat", "supports_tool_choice": true @@ -11315,8 +11978,8 @@ "max_tokens": 256000, "max_input_tokens": 256000, "max_output_tokens": 256000, - "input_cost_per_token": 2e-07, - "output_cost_per_token": 4e-07, + "input_cost_per_token": 2e-7, + "output_cost_per_token": 4e-7, "litellm_provider": "ai21", "mode": "chat", "supports_tool_choice": true @@ -11325,8 +11988,8 @@ "max_tokens": 256000, "max_input_tokens": 256000, "max_output_tokens": 256000, - "input_cost_per_token": 2e-07, - "output_cost_per_token": 4e-07, + "input_cost_per_token": 2e-7, + "output_cost_per_token": 4e-7, "litellm_provider": "ai21", "mode": "chat", "supports_tool_choice": true @@ -11335,8 +11998,8 @@ "max_tokens": 8192, "max_input_tokens": 8192, "max_output_tokens": 8192, - "input_cost_per_token": 1e-05, - "output_cost_per_token": 1e-05, + "input_cost_per_token": 0.00001, + "output_cost_per_token": 0.00001, "litellm_provider": "ai21", "mode": "completion" }, @@ -11344,8 +12007,8 @@ "max_tokens": 8192, "max_input_tokens": 8192, "max_output_tokens": 8192, - "input_cost_per_token": 3e-06, - "output_cost_per_token": 3e-06, + "input_cost_per_token": 0.000003, + "output_cost_per_token": 0.000003, "litellm_provider": "ai21", "mode": "completion" }, @@ -11353,8 +12016,8 @@ "max_tokens": 16384, "max_input_tokens": 16384, "max_output_tokens": 16384, - "input_cost_per_token": 5e-07, - "output_cost_per_token": 5e-07, + "input_cost_per_token": 5e-7, + "output_cost_per_token": 5e-7, "litellm_provider": "nlp_cloud", "mode": "completion" }, @@ -11362,36 +12025,36 @@ "max_tokens": 16384, "max_input_tokens": 16384, "max_output_tokens": 16384, - "input_cost_per_token": 5e-07, - "output_cost_per_token": 5e-07, + "input_cost_per_token": 5e-7, + "output_cost_per_token": 5e-7, "litellm_provider": "nlp_cloud", "mode": "chat" }, "luminous-base": { "max_tokens": 2048, - "input_cost_per_token": 3e-05, - "output_cost_per_token": 3.3e-05, + "input_cost_per_token": 0.00003, + "output_cost_per_token": 0.000033, "litellm_provider": "aleph_alpha", "mode": "completion" }, "luminous-base-control": { "max_tokens": 2048, - "input_cost_per_token": 3.75e-05, - "output_cost_per_token": 4.125e-05, + "input_cost_per_token": 0.0000375, + "output_cost_per_token": 0.00004125, "litellm_provider": "aleph_alpha", "mode": "chat" }, "luminous-extended": { "max_tokens": 2048, - "input_cost_per_token": 4.5e-05, - "output_cost_per_token": 4.95e-05, + "input_cost_per_token": 0.000045, + "output_cost_per_token": 0.0000495, "litellm_provider": "aleph_alpha", "mode": "completion" }, "luminous-extended-control": { "max_tokens": 2048, - "input_cost_per_token": 5.625e-05, - "output_cost_per_token": 6.1875e-05, + "input_cost_per_token": 0.00005625, + "output_cost_per_token": 0.000061875, "litellm_provider": "aleph_alpha", "mode": "chat" }, @@ -11413,8 +12076,8 @@ "max_tokens": 8191, "max_input_tokens": 8191, "max_output_tokens": 8191, - "input_cost_per_token": 1.25e-05, - "output_cost_per_token": 1.25e-05, + "input_cost_per_token": 0.0000125, + "output_cost_per_token": 0.0000125, "litellm_provider": "bedrock", "mode": "chat" }, @@ -11422,8 +12085,8 @@ "max_tokens": 8191, "max_input_tokens": 8191, "max_output_tokens": 8191, - "input_cost_per_token": 1.88e-05, - "output_cost_per_token": 1.88e-05, + "input_cost_per_token": 0.0000188, + "output_cost_per_token": 0.0000188, "litellm_provider": "bedrock", "mode": "chat" }, @@ -11431,8 +12094,8 @@ "max_tokens": 4096, "max_input_tokens": 70000, "max_output_tokens": 4096, - "input_cost_per_token": 5e-07, - "output_cost_per_token": 7e-07, + "input_cost_per_token": 5e-7, + "output_cost_per_token": 7e-7, "litellm_provider": "bedrock", "mode": "chat", "supports_system_messages": true @@ -11441,8 +12104,8 @@ "max_tokens": 256000, "max_input_tokens": 256000, "max_output_tokens": 256000, - "input_cost_per_token": 2e-06, - "output_cost_per_token": 8e-06, + "input_cost_per_token": 0.000002, + "output_cost_per_token": 0.000008, "litellm_provider": "bedrock", "mode": "chat" }, @@ -11450,8 +12113,8 @@ "max_tokens": 256000, "max_input_tokens": 256000, "max_output_tokens": 256000, - "input_cost_per_token": 2e-07, - "output_cost_per_token": 4e-07, + "input_cost_per_token": 2e-7, + "output_cost_per_token": 4e-7, "litellm_provider": "bedrock", "mode": "chat" }, @@ -11462,9 +12125,9 @@ "max_query_tokens": 32000, "max_document_chunks_per_query": 100, "max_tokens_per_document_chunk": 512, - "input_cost_per_token": 0.0, + "input_cost_per_token": 0, "input_cost_per_query": 0.001, - "output_cost_per_token": 0.0, + "output_cost_per_token": 0, "litellm_provider": "bedrock", "mode": "rerank" }, @@ -11472,8 +12135,8 @@ "max_tokens": 4000, "max_input_tokens": 42000, "max_output_tokens": 4000, - "input_cost_per_token": 3e-07, - "output_cost_per_token": 4e-07, + "input_cost_per_token": 3e-7, + "output_cost_per_token": 4e-7, "litellm_provider": "bedrock", "mode": "chat" }, @@ -11481,8 +12144,8 @@ "max_tokens": 8000, "max_input_tokens": 42000, "max_output_tokens": 8000, - "input_cost_per_token": 1.3e-06, - "output_cost_per_token": 1.7e-06, + "input_cost_per_token": 0.0000013, + "output_cost_per_token": 0.0000017, "litellm_provider": "bedrock", "mode": "chat" }, @@ -11490,8 +12153,8 @@ "max_tokens": 32000, "max_input_tokens": 42000, "max_output_tokens": 32000, - "input_cost_per_token": 5e-07, - "output_cost_per_token": 1.5e-06, + "input_cost_per_token": 5e-7, + "output_cost_per_token": 0.0000015, "litellm_provider": "bedrock", "mode": "chat" }, @@ -11499,8 +12162,8 @@ "max_tokens": 8192, "max_input_tokens": 8192, "output_vector_size": 1536, - "input_cost_per_token": 1e-07, - "output_cost_per_token": 0.0, + "input_cost_per_token": 1e-7, + "output_cost_per_token": 0, "litellm_provider": "bedrock", "mode": "embedding" }, @@ -11508,8 +12171,8 @@ "max_tokens": 8192, "max_input_tokens": 8192, "output_vector_size": 1024, - "input_cost_per_token": 2e-07, - "output_cost_per_token": 0.0, + "input_cost_per_token": 2e-7, + "output_cost_per_token": 0, "litellm_provider": "bedrock", "mode": "embedding" }, @@ -11517,9 +12180,9 @@ "max_tokens": 128, "max_input_tokens": 128, "output_vector_size": 1024, - "input_cost_per_token": 8e-07, - "input_cost_per_image": 6e-05, - "output_cost_per_token": 0.0, + "input_cost_per_token": 8e-7, + "input_cost_per_image": 0.00006, + "output_cost_per_token": 0, "litellm_provider": "bedrock", "supports_image_input": true, "supports_embedding_image_input": true, @@ -11533,8 +12196,8 @@ "max_tokens": 8191, "max_input_tokens": 32000, "max_output_tokens": 8191, - "input_cost_per_token": 1.5e-07, - "output_cost_per_token": 2e-07, + "input_cost_per_token": 1.5e-7, + "output_cost_per_token": 2e-7, "litellm_provider": "bedrock", "mode": "chat", "supports_tool_choice": true @@ -11543,8 +12206,8 @@ "max_tokens": 8191, "max_input_tokens": 32000, "max_output_tokens": 8191, - "input_cost_per_token": 4.5e-07, - "output_cost_per_token": 7e-07, + "input_cost_per_token": 4.5e-7, + "output_cost_per_token": 7e-7, "litellm_provider": "bedrock", "mode": "chat", "supports_tool_choice": true @@ -11553,8 +12216,8 @@ "max_tokens": 8191, "max_input_tokens": 32000, "max_output_tokens": 8191, - "input_cost_per_token": 8e-06, - "output_cost_per_token": 2.4e-05, + "input_cost_per_token": 0.000008, + "output_cost_per_token": 0.000024, "litellm_provider": "bedrock", "mode": "chat", "supports_function_calling": true @@ -11563,8 +12226,8 @@ "max_tokens": 8191, "max_input_tokens": 128000, "max_output_tokens": 8191, - "input_cost_per_token": 3e-06, - "output_cost_per_token": 9e-06, + "input_cost_per_token": 0.000003, + "output_cost_per_token": 0.000009, "litellm_provider": "bedrock", "mode": "chat", "supports_function_calling": true, @@ -11574,8 +12237,8 @@ "max_tokens": 8191, "max_input_tokens": 32000, "max_output_tokens": 8191, - "input_cost_per_token": 1e-06, - "output_cost_per_token": 3e-06, + "input_cost_per_token": 0.000001, + "output_cost_per_token": 0.000003, "litellm_provider": "bedrock", "mode": "chat", "supports_function_calling": true @@ -11584,8 +12247,8 @@ "max_tokens": 128000, "max_input_tokens": 128000, "max_output_tokens": 4096, - "input_cost_per_token": 2e-06, - "output_cost_per_token": 6e-06, + "input_cost_per_token": 0.000002, + "output_cost_per_token": 0.000006, "litellm_provider": "bedrock_converse", "mode": "chat", "supports_function_calling": true, @@ -11595,8 +12258,8 @@ "max_tokens": 128000, "max_input_tokens": 128000, "max_output_tokens": 4096, - "input_cost_per_token": 2e-06, - "output_cost_per_token": 6e-06, + "input_cost_per_token": 0.000002, + "output_cost_per_token": 0.000006, "litellm_provider": "bedrock_converse", "mode": "chat", "supports_function_calling": true, @@ -11606,8 +12269,8 @@ "max_tokens": 8191, "max_input_tokens": 32000, "max_output_tokens": 8191, - "input_cost_per_token": 4.5e-07, - "output_cost_per_token": 7e-07, + "input_cost_per_token": 4.5e-7, + "output_cost_per_token": 7e-7, "litellm_provider": "bedrock", "mode": "chat", "supports_tool_choice": true @@ -11616,8 +12279,8 @@ "max_tokens": 8191, "max_input_tokens": 32000, "max_output_tokens": 8191, - "input_cost_per_token": 4.5e-07, - "output_cost_per_token": 7e-07, + "input_cost_per_token": 4.5e-7, + "output_cost_per_token": 7e-7, "litellm_provider": "bedrock", "mode": "chat", "supports_tool_choice": true @@ -11626,8 +12289,8 @@ "max_tokens": 8191, "max_input_tokens": 32000, "max_output_tokens": 8191, - "input_cost_per_token": 5.9e-07, - "output_cost_per_token": 9.1e-07, + "input_cost_per_token": 5.9e-7, + "output_cost_per_token": 9.1e-7, "litellm_provider": "bedrock", "mode": "chat", "supports_tool_choice": true @@ -11636,8 +12299,8 @@ "max_tokens": 8191, "max_input_tokens": 32000, "max_output_tokens": 8191, - "input_cost_per_token": 1.5e-07, - "output_cost_per_token": 2e-07, + "input_cost_per_token": 1.5e-7, + "output_cost_per_token": 2e-7, "litellm_provider": "bedrock", "mode": "chat", "supports_tool_choice": true @@ -11646,8 +12309,8 @@ "max_tokens": 8191, "max_input_tokens": 32000, "max_output_tokens": 8191, - "input_cost_per_token": 1.5e-07, - "output_cost_per_token": 2e-07, + "input_cost_per_token": 1.5e-7, + "output_cost_per_token": 2e-7, "litellm_provider": "bedrock", "mode": "chat", "supports_tool_choice": true @@ -11656,8 +12319,8 @@ "max_tokens": 8191, "max_input_tokens": 32000, "max_output_tokens": 8191, - "input_cost_per_token": 2e-07, - "output_cost_per_token": 2.6e-07, + "input_cost_per_token": 2e-7, + "output_cost_per_token": 2.6e-7, "litellm_provider": "bedrock", "mode": "chat", "supports_tool_choice": true @@ -11666,8 +12329,8 @@ "max_tokens": 8191, "max_input_tokens": 32000, "max_output_tokens": 8191, - "input_cost_per_token": 8e-06, - "output_cost_per_token": 2.4e-05, + "input_cost_per_token": 0.000008, + "output_cost_per_token": 0.000024, "litellm_provider": "bedrock", "mode": "chat", "supports_function_calling": true @@ -11676,8 +12339,8 @@ "max_tokens": 8191, "max_input_tokens": 32000, "max_output_tokens": 8191, - "input_cost_per_token": 8e-06, - "output_cost_per_token": 2.4e-05, + "input_cost_per_token": 0.000008, + "output_cost_per_token": 0.000024, "litellm_provider": "bedrock", "mode": "chat", "supports_function_calling": true @@ -11686,8 +12349,8 @@ "max_tokens": 8191, "max_input_tokens": 32000, "max_output_tokens": 8191, - "input_cost_per_token": 1.04e-05, - "output_cost_per_token": 3.12e-05, + "input_cost_per_token": 0.0000104, + "output_cost_per_token": 0.0000312, "litellm_provider": "bedrock", "mode": "chat", "supports_function_calling": true @@ -11696,8 +12359,8 @@ "max_tokens": 10000, "max_input_tokens": 128000, "max_output_tokens": 10000, - "input_cost_per_token": 3.5e-08, - "output_cost_per_token": 1.4e-07, + "input_cost_per_token": 3.5e-8, + "output_cost_per_token": 1.4e-7, "litellm_provider": "bedrock_converse", "mode": "chat", "supports_function_calling": true, @@ -11708,8 +12371,8 @@ "max_tokens": 10000, "max_input_tokens": 128000, "max_output_tokens": 10000, - "input_cost_per_token": 3.5e-08, - "output_cost_per_token": 1.4e-07, + "input_cost_per_token": 3.5e-8, + "output_cost_per_token": 1.4e-7, "litellm_provider": "bedrock_converse", "mode": "chat", "supports_function_calling": true, @@ -11720,8 +12383,8 @@ "max_tokens": 10000, "max_input_tokens": 128000, "max_output_tokens": 10000, - "input_cost_per_token": 4.6e-08, - "output_cost_per_token": 1.84e-07, + "input_cost_per_token": 4.6e-8, + "output_cost_per_token": 1.84e-7, "litellm_provider": "bedrock_converse", "mode": "chat", "supports_function_calling": true, @@ -11732,8 +12395,8 @@ "max_tokens": 10000, "max_input_tokens": 300000, "max_output_tokens": 10000, - "input_cost_per_token": 6e-08, - "output_cost_per_token": 2.4e-07, + "input_cost_per_token": 6e-8, + "output_cost_per_token": 2.4e-7, "litellm_provider": "bedrock_converse", "mode": "chat", "supports_function_calling": true, @@ -11746,8 +12409,8 @@ "max_tokens": 10000, "max_input_tokens": 300000, "max_output_tokens": 10000, - "input_cost_per_token": 6e-08, - "output_cost_per_token": 2.4e-07, + "input_cost_per_token": 6e-8, + "output_cost_per_token": 2.4e-7, "litellm_provider": "bedrock_converse", "mode": "chat", "supports_function_calling": true, @@ -11760,8 +12423,8 @@ "max_tokens": 10000, "max_input_tokens": 300000, "max_output_tokens": 10000, - "input_cost_per_token": 7.8e-08, - "output_cost_per_token": 3.12e-07, + "input_cost_per_token": 7.8e-8, + "output_cost_per_token": 3.12e-7, "litellm_provider": "bedrock_converse", "mode": "chat", "supports_function_calling": true, @@ -11774,8 +12437,8 @@ "max_tokens": 10000, "max_input_tokens": 300000, "max_output_tokens": 10000, - "input_cost_per_token": 8e-07, - "output_cost_per_token": 3.2e-06, + "input_cost_per_token": 8e-7, + "output_cost_per_token": 0.0000032, "litellm_provider": "bedrock_converse", "mode": "chat", "supports_function_calling": true, @@ -11788,8 +12451,8 @@ "max_tokens": 10000, "max_input_tokens": 300000, "max_output_tokens": 10000, - "input_cost_per_token": 8e-07, - "output_cost_per_token": 3.2e-06, + "input_cost_per_token": 8e-7, + "output_cost_per_token": 0.0000032, "litellm_provider": "bedrock_converse", "mode": "chat", "supports_function_calling": true, @@ -11808,8 +12471,8 @@ "max_tokens": 10000, "max_input_tokens": 300000, "max_output_tokens": 10000, - "input_cost_per_token": 1.05e-06, - "output_cost_per_token": 4.2e-06, + "input_cost_per_token": 0.00000105, + "output_cost_per_token": 0.0000042, "litellm_provider": "bedrock_converse", "mode": "chat", "supports_function_calling": true, @@ -11823,8 +12486,8 @@ "max_tokens": 10000, "max_input_tokens": 128000, "max_output_tokens": 10000, - "input_cost_per_token": 3.7e-08, - "output_cost_per_token": 1.48e-07, + "input_cost_per_token": 3.7e-8, + "output_cost_per_token": 1.48e-7, "litellm_provider": "bedrock_converse", "mode": "chat", "supports_function_calling": true, @@ -11835,8 +12498,8 @@ "max_tokens": 10000, "max_input_tokens": 300000, "max_output_tokens": 10000, - "input_cost_per_token": 6.3e-08, - "output_cost_per_token": 2.52e-07, + "input_cost_per_token": 6.3e-8, + "output_cost_per_token": 2.52e-7, "litellm_provider": "bedrock_converse", "mode": "chat", "supports_function_calling": true, @@ -11849,8 +12512,8 @@ "max_tokens": 10000, "max_input_tokens": 300000, "max_output_tokens": 10000, - "input_cost_per_token": 8.4e-07, - "output_cost_per_token": 3.36e-06, + "input_cost_per_token": 8.4e-7, + "output_cost_per_token": 0.00000336, "litellm_provider": "bedrock_converse", "mode": "chat", "supports_function_calling": true, @@ -11863,8 +12526,8 @@ "max_tokens": 10000, "max_input_tokens": 1000000, "max_output_tokens": 10000, - "input_cost_per_token": 2.5e-06, - "output_cost_per_token": 1.25e-05, + "input_cost_per_token": 0.0000025, + "output_cost_per_token": 0.0000125, "litellm_provider": "bedrock_converse", "mode": "chat", "supports_function_calling": true, @@ -11877,8 +12540,8 @@ "max_tokens": 4096, "max_input_tokens": 200000, "max_output_tokens": 4096, - "input_cost_per_token": 3e-06, - "output_cost_per_token": 1.5e-05, + "input_cost_per_token": 0.000003, + "output_cost_per_token": 0.000015, "litellm_provider": "bedrock", "mode": "chat", "supports_function_calling": true, @@ -11891,8 +12554,8 @@ "max_tokens": 4096, "max_input_tokens": 200000, "max_output_tokens": 4096, - "input_cost_per_token": 3e-06, - "output_cost_per_token": 1.5e-05, + "input_cost_per_token": 0.000003, + "output_cost_per_token": 0.000015, "litellm_provider": "bedrock", "mode": "chat", "supports_function_calling": true, @@ -11907,8 +12570,8 @@ "max_tokens": 4096, "max_input_tokens": 200000, "max_output_tokens": 4096, - "input_cost_per_token": 3e-06, - "output_cost_per_token": 1.5e-05, + "input_cost_per_token": 0.000003, + "output_cost_per_token": 0.000015, "litellm_provider": "bedrock", "mode": "chat", "supports_function_calling": true, @@ -11917,19 +12580,47 @@ "supports_pdf_input": true, "supports_tool_choice": true }, + "openai.gpt-oss-20b-1:0": { + "max_tokens": 128000, + "max_input_tokens": 128000, + "max_output_tokens": 128000, + "input_cost_per_token": 7e-8, + "output_cost_per_token": 3e-7, + "litellm_provider": "bedrock_converse", + "mode": "chat", + "supports_function_calling": true, + "supports_vision": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_reasoning": true + }, + "openai.gpt-oss-120b-1:0": { + "max_tokens": 128000, + "max_input_tokens": 128000, + "max_output_tokens": 128000, + "input_cost_per_token": 1.5e-7, + "output_cost_per_token": 6e-7, + "litellm_provider": "bedrock_converse", + "mode": "chat", + "supports_function_calling": true, + "supports_vision": true, + "supports_response_schema": true, + "supports_tool_choice": true, + "supports_reasoning": true + }, "anthropic.claude-opus-4-1-20250805-v1:0": { "max_tokens": 32000, "max_input_tokens": 200000, "max_output_tokens": 32000, - "input_cost_per_token": 1.5e-05, - "output_cost_per_token": 7.5e-05, + "input_cost_per_token": 0.000015, + "output_cost_per_token": 0.000075, "search_context_cost_per_query": { "search_context_size_low": 0.01, "search_context_size_medium": 0.01, "search_context_size_high": 0.01 }, - "cache_creation_input_token_cost": 1.875e-05, - "cache_read_input_token_cost": 1.5e-06, + "cache_creation_input_token_cost": 0.00001875, + "cache_read_input_token_cost": 0.0000015, "litellm_provider": "bedrock_converse", "mode": "chat", "supports_function_calling": true, @@ -11947,15 +12638,15 @@ "max_tokens": 32000, "max_input_tokens": 200000, "max_output_tokens": 32000, - "input_cost_per_token": 1.5e-05, - "output_cost_per_token": 7.5e-05, + "input_cost_per_token": 0.000015, + "output_cost_per_token": 0.000075, "search_context_cost_per_query": { "search_context_size_low": 0.01, "search_context_size_medium": 0.01, "search_context_size_high": 0.01 }, - "cache_creation_input_token_cost": 1.875e-05, - "cache_read_input_token_cost": 1.5e-06, + "cache_creation_input_token_cost": 0.00001875, + "cache_read_input_token_cost": 0.0000015, "litellm_provider": "bedrock_converse", "mode": "chat", "supports_function_calling": true, @@ -11973,15 +12664,15 @@ "max_tokens": 64000, "max_input_tokens": 200000, "max_output_tokens": 64000, - "input_cost_per_token": 3e-06, - "output_cost_per_token": 1.5e-05, + "input_cost_per_token": 0.000003, + "output_cost_per_token": 0.000015, "search_context_cost_per_query": { "search_context_size_low": 0.01, "search_context_size_medium": 0.01, "search_context_size_high": 0.01 }, - "cache_creation_input_token_cost": 3.75e-06, - "cache_read_input_token_cost": 3e-07, + "cache_creation_input_token_cost": 0.00000375, + "cache_read_input_token_cost": 3e-7, "litellm_provider": "bedrock_converse", "mode": "chat", "supports_function_calling": true, @@ -12000,10 +12691,10 @@ "max_tokens": 8192, "max_input_tokens": 200000, "max_output_tokens": 8192, - "input_cost_per_token": 3e-06, - "output_cost_per_token": 1.5e-05, - "cache_creation_input_token_cost": 3.75e-06, - "cache_read_input_token_cost": 3e-07, + "input_cost_per_token": 0.000003, + "output_cost_per_token": 0.000015, + "cache_creation_input_token_cost": 0.00000375, + "cache_read_input_token_cost": 3e-7, "litellm_provider": "bedrock_converse", "mode": "chat", "supports_function_calling": true, @@ -12020,10 +12711,10 @@ "max_tokens": 8192, "max_input_tokens": 200000, "max_output_tokens": 8192, - "input_cost_per_token": 3e-06, - "output_cost_per_token": 1.5e-05, - "cache_creation_input_token_cost": 3.75e-06, - "cache_read_input_token_cost": 3e-07, + "input_cost_per_token": 0.000003, + "output_cost_per_token": 0.000015, + "cache_creation_input_token_cost": 0.00000375, + "cache_read_input_token_cost": 3e-7, "litellm_provider": "bedrock", "mode": "chat", "supports_function_calling": true, @@ -12038,8 +12729,8 @@ "max_tokens": 4096, "max_input_tokens": 200000, "max_output_tokens": 4096, - "input_cost_per_token": 2.5e-07, - "output_cost_per_token": 1.25e-06, + "input_cost_per_token": 2.5e-7, + "output_cost_per_token": 0.00000125, "litellm_provider": "bedrock", "mode": "chat", "supports_function_calling": true, @@ -12052,10 +12743,10 @@ "max_tokens": 8192, "max_input_tokens": 200000, "max_output_tokens": 8192, - "input_cost_per_token": 8e-07, - "output_cost_per_token": 4e-06, - "cache_creation_input_token_cost": 1e-06, - "cache_read_input_token_cost": 8e-08, + "input_cost_per_token": 8e-7, + "output_cost_per_token": 0.000004, + "cache_creation_input_token_cost": 0.000001, + "cache_read_input_token_cost": 8e-8, "litellm_provider": "bedrock", "mode": "chat", "supports_assistant_prefill": true, @@ -12069,8 +12760,8 @@ "max_tokens": 4096, "max_input_tokens": 200000, "max_output_tokens": 4096, - "input_cost_per_token": 1.5e-05, - "output_cost_per_token": 7.5e-05, + "input_cost_per_token": 0.000015, + "output_cost_per_token": 0.000075, "litellm_provider": "bedrock", "mode": "chat", "supports_function_calling": true, @@ -12082,8 +12773,8 @@ "max_tokens": 4096, "max_input_tokens": 200000, "max_output_tokens": 4096, - "input_cost_per_token": 3e-06, - "output_cost_per_token": 1.5e-05, + "input_cost_per_token": 0.000003, + "output_cost_per_token": 0.000015, "litellm_provider": "bedrock", "mode": "chat", "supports_function_calling": true, @@ -12096,8 +12787,8 @@ "max_tokens": 4096, "max_input_tokens": 200000, "max_output_tokens": 4096, - "input_cost_per_token": 3e-06, - "output_cost_per_token": 1.5e-05, + "input_cost_per_token": 0.000003, + "output_cost_per_token": 0.000015, "litellm_provider": "bedrock", "mode": "chat", "supports_function_calling": true, @@ -12111,10 +12802,10 @@ "max_tokens": 8192, "max_input_tokens": 200000, "max_output_tokens": 8192, - "input_cost_per_token": 3e-06, - "output_cost_per_token": 1.5e-05, - "cache_creation_input_token_cost": 3.75e-06, - "cache_read_input_token_cost": 3e-07, + "input_cost_per_token": 0.000003, + "output_cost_per_token": 0.000015, + "cache_creation_input_token_cost": 0.00000375, + "cache_read_input_token_cost": 3e-7, "litellm_provider": "bedrock", "mode": "chat", "supports_function_calling": true, @@ -12130,10 +12821,10 @@ "max_tokens": 8192, "max_input_tokens": 200000, "max_output_tokens": 8192, - "input_cost_per_token": 3e-06, - "output_cost_per_token": 1.5e-05, - "cache_creation_input_token_cost": 3.75e-06, - "cache_read_input_token_cost": 3e-07, + "input_cost_per_token": 0.000003, + "output_cost_per_token": 0.000015, + "cache_creation_input_token_cost": 0.00000375, + "cache_read_input_token_cost": 3e-7, "litellm_provider": "bedrock_converse", "mode": "chat", "supports_function_calling": true, @@ -12149,15 +12840,15 @@ "max_tokens": 32000, "max_input_tokens": 200000, "max_output_tokens": 32000, - "input_cost_per_token": 1.5e-05, - "output_cost_per_token": 7.5e-05, + "input_cost_per_token": 0.000015, + "output_cost_per_token": 0.000075, "search_context_cost_per_query": { "search_context_size_low": 0.01, "search_context_size_medium": 0.01, "search_context_size_high": 0.01 }, - "cache_creation_input_token_cost": 1.875e-05, - "cache_read_input_token_cost": 1.5e-06, + "cache_creation_input_token_cost": 0.00001875, + "cache_read_input_token_cost": 0.0000015, "litellm_provider": "bedrock_converse", "mode": "chat", "supports_function_calling": true, @@ -12175,15 +12866,15 @@ "max_tokens": 32000, "max_input_tokens": 200000, "max_output_tokens": 32000, - "input_cost_per_token": 1.5e-05, - "output_cost_per_token": 7.5e-05, + "input_cost_per_token": 0.000015, + "output_cost_per_token": 0.000075, "search_context_cost_per_query": { "search_context_size_low": 0.01, "search_context_size_medium": 0.01, "search_context_size_high": 0.01 }, - "cache_creation_input_token_cost": 1.875e-05, - "cache_read_input_token_cost": 1.5e-06, + "cache_creation_input_token_cost": 0.00001875, + "cache_read_input_token_cost": 0.0000015, "litellm_provider": "bedrock_converse", "mode": "chat", "supports_function_calling": true, @@ -12201,15 +12892,15 @@ "max_tokens": 64000, "max_input_tokens": 200000, "max_output_tokens": 64000, - "input_cost_per_token": 3e-06, - "output_cost_per_token": 1.5e-05, + "input_cost_per_token": 0.000003, + "output_cost_per_token": 0.000015, "search_context_cost_per_query": { "search_context_size_low": 0.01, "search_context_size_medium": 0.01, "search_context_size_high": 0.01 }, - "cache_creation_input_token_cost": 3.75e-06, - "cache_read_input_token_cost": 3e-07, + "cache_creation_input_token_cost": 0.00000375, + "cache_read_input_token_cost": 3e-7, "litellm_provider": "bedrock_converse", "mode": "chat", "supports_function_calling": true, @@ -12227,8 +12918,8 @@ "max_tokens": 4096, "max_input_tokens": 200000, "max_output_tokens": 4096, - "input_cost_per_token": 2.5e-07, - "output_cost_per_token": 1.25e-06, + "input_cost_per_token": 2.5e-7, + "output_cost_per_token": 0.00000125, "litellm_provider": "bedrock", "mode": "chat", "supports_function_calling": true, @@ -12241,10 +12932,10 @@ "max_tokens": 8192, "max_input_tokens": 200000, "max_output_tokens": 8192, - "input_cost_per_token": 8e-07, - "output_cost_per_token": 4e-06, - "cache_creation_input_token_cost": 1e-06, - "cache_read_input_token_cost": 8e-08, + "input_cost_per_token": 8e-7, + "output_cost_per_token": 0.000004, + "cache_creation_input_token_cost": 0.000001, + "cache_read_input_token_cost": 8e-8, "litellm_provider": "bedrock", "mode": "chat", "supports_assistant_prefill": true, @@ -12258,8 +12949,8 @@ "max_tokens": 4096, "max_input_tokens": 200000, "max_output_tokens": 4096, - "input_cost_per_token": 1.5e-05, - "output_cost_per_token": 7.5e-05, + "input_cost_per_token": 0.000015, + "output_cost_per_token": 0.000075, "litellm_provider": "bedrock", "mode": "chat", "supports_function_calling": true, @@ -12271,8 +12962,8 @@ "max_tokens": 4096, "max_input_tokens": 200000, "max_output_tokens": 4096, - "input_cost_per_token": 3e-06, - "output_cost_per_token": 1.5e-05, + "input_cost_per_token": 0.000003, + "output_cost_per_token": 0.000015, "litellm_provider": "bedrock", "mode": "chat", "supports_function_calling": true, @@ -12285,8 +12976,8 @@ "max_tokens": 4096, "max_input_tokens": 200000, "max_output_tokens": 4096, - "input_cost_per_token": 3e-06, - "output_cost_per_token": 1.5e-05, + "input_cost_per_token": 0.000003, + "output_cost_per_token": 0.000015, "litellm_provider": "bedrock", "mode": "chat", "supports_function_calling": true, @@ -12300,8 +12991,8 @@ "max_tokens": 8192, "max_input_tokens": 200000, "max_output_tokens": 8192, - "input_cost_per_token": 3e-06, - "output_cost_per_token": 1.5e-05, + "input_cost_per_token": 0.000003, + "output_cost_per_token": 0.000015, "litellm_provider": "bedrock", "mode": "chat", "supports_function_calling": true, @@ -12317,8 +13008,8 @@ "max_tokens": 8192, "max_input_tokens": 200000, "max_output_tokens": 8192, - "input_cost_per_token": 3e-06, - "output_cost_per_token": 1.5e-05, + "input_cost_per_token": 0.000003, + "output_cost_per_token": 0.000015, "litellm_provider": "bedrock", "mode": "chat", "supports_function_calling": true, @@ -12334,8 +13025,8 @@ "max_tokens": 4096, "max_input_tokens": 200000, "max_output_tokens": 4096, - "input_cost_per_token": 2.5e-07, - "output_cost_per_token": 1.25e-06, + "input_cost_per_token": 2.5e-7, + "output_cost_per_token": 0.00000125, "litellm_provider": "bedrock", "mode": "chat", "supports_function_calling": true, @@ -12348,15 +13039,15 @@ "max_tokens": 32000, "max_input_tokens": 200000, "max_output_tokens": 32000, - "input_cost_per_token": 1.5e-05, - "output_cost_per_token": 7.5e-05, + "input_cost_per_token": 0.000015, + "output_cost_per_token": 0.000075, "search_context_cost_per_query": { "search_context_size_low": 0.01, "search_context_size_medium": 0.01, "search_context_size_high": 0.01 }, - "cache_creation_input_token_cost": 1.875e-05, - "cache_read_input_token_cost": 1.5e-06, + "cache_creation_input_token_cost": 0.00001875, + "cache_read_input_token_cost": 0.0000015, "litellm_provider": "bedrock_converse", "mode": "chat", "supports_function_calling": true, @@ -12374,15 +13065,15 @@ "max_tokens": 32000, "max_input_tokens": 200000, "max_output_tokens": 32000, - "input_cost_per_token": 1.5e-05, - "output_cost_per_token": 7.5e-05, + "input_cost_per_token": 0.000015, + "output_cost_per_token": 0.000075, "search_context_cost_per_query": { "search_context_size_low": 0.01, "search_context_size_medium": 0.01, "search_context_size_high": 0.01 }, - "cache_creation_input_token_cost": 1.875e-05, - "cache_read_input_token_cost": 1.5e-06, + "cache_creation_input_token_cost": 0.00001875, + "cache_read_input_token_cost": 0.0000015, "litellm_provider": "bedrock_converse", "mode": "chat", "supports_function_calling": true, @@ -12400,15 +13091,15 @@ "max_tokens": 64000, "max_input_tokens": 200000, "max_output_tokens": 64000, - "input_cost_per_token": 3e-06, - "output_cost_per_token": 1.5e-05, + "input_cost_per_token": 0.000003, + "output_cost_per_token": 0.000015, "search_context_cost_per_query": { "search_context_size_low": 0.01, "search_context_size_medium": 0.01, "search_context_size_high": 0.01 }, - "cache_creation_input_token_cost": 3.75e-06, - "cache_read_input_token_cost": 3e-07, + "cache_creation_input_token_cost": 0.00000375, + "cache_read_input_token_cost": 3e-7, "litellm_provider": "bedrock_converse", "mode": "chat", "supports_function_calling": true, @@ -12426,8 +13117,8 @@ "max_tokens": 4096, "max_input_tokens": 200000, "max_output_tokens": 4096, - "input_cost_per_token": 2.5e-07, - "output_cost_per_token": 1.25e-06, + "input_cost_per_token": 2.5e-7, + "output_cost_per_token": 0.00000125, "litellm_provider": "bedrock", "mode": "chat", "supports_function_calling": true, @@ -12440,8 +13131,8 @@ "max_tokens": 4096, "max_input_tokens": 200000, "max_output_tokens": 4096, - "input_cost_per_token": 3e-06, - "output_cost_per_token": 1.5e-05, + "input_cost_per_token": 0.000003, + "output_cost_per_token": 0.000015, "litellm_provider": "bedrock", "mode": "chat", "supports_function_calling": true, @@ -12454,8 +13145,8 @@ "max_tokens": 4096, "max_input_tokens": 200000, "max_output_tokens": 4096, - "input_cost_per_token": 3e-06, - "output_cost_per_token": 1.5e-05, + "input_cost_per_token": 0.000003, + "output_cost_per_token": 0.000015, "litellm_provider": "bedrock", "mode": "chat", "supports_function_calling": true, @@ -12468,10 +13159,10 @@ "max_tokens": 8192, "max_input_tokens": 200000, "max_output_tokens": 8192, - "input_cost_per_token": 3e-06, - "output_cost_per_token": 1.5e-05, - "cache_creation_input_token_cost": 3.75e-06, - "cache_read_input_token_cost": 3e-07, + "input_cost_per_token": 0.000003, + "output_cost_per_token": 0.000015, + "cache_creation_input_token_cost": 0.00000375, + "cache_read_input_token_cost": 3e-7, "litellm_provider": "bedrock", "mode": "chat", "supports_function_calling": true, @@ -12487,15 +13178,15 @@ "max_tokens": 64000, "max_input_tokens": 200000, "max_output_tokens": 64000, - "input_cost_per_token": 3e-06, - "output_cost_per_token": 1.5e-05, + "input_cost_per_token": 0.000003, + "output_cost_per_token": 0.000015, "search_context_cost_per_query": { "search_context_size_low": 0.01, "search_context_size_medium": 0.01, "search_context_size_high": 0.01 }, - "cache_creation_input_token_cost": 3.75e-06, - "cache_read_input_token_cost": 3e-07, + "cache_creation_input_token_cost": 0.00000375, + "cache_read_input_token_cost": 3e-7, "litellm_provider": "bedrock_converse", "mode": "chat", "supports_function_calling": true, @@ -12513,8 +13204,8 @@ "max_tokens": 8192, "max_input_tokens": 200000, "max_output_tokens": 8192, - "input_cost_per_token": 2.5e-07, - "output_cost_per_token": 1.25e-06, + "input_cost_per_token": 2.5e-7, + "output_cost_per_token": 0.00000125, "litellm_provider": "bedrock", "mode": "chat", "supports_function_calling": true, @@ -12528,8 +13219,8 @@ "max_tokens": 4096, "max_input_tokens": 200000, "max_output_tokens": 4096, - "input_cost_per_token": 1.5e-05, - "output_cost_per_token": 7.5e-05, + "input_cost_per_token": 0.000015, + "output_cost_per_token": 0.000075, "litellm_provider": "bedrock", "mode": "chat", "supports_function_calling": true, @@ -12541,8 +13232,8 @@ "max_tokens": 8191, "max_input_tokens": 100000, "max_output_tokens": 8191, - "input_cost_per_token": 8e-06, - "output_cost_per_token": 2.4e-05, + "input_cost_per_token": 0.000008, + "output_cost_per_token": 0.000024, "litellm_provider": "bedrock", "mode": "chat" }, @@ -12550,8 +13241,8 @@ "max_tokens": 8191, "max_input_tokens": 100000, "max_output_tokens": 8191, - "input_cost_per_token": 8e-06, - "output_cost_per_token": 2.4e-05, + "input_cost_per_token": 0.000008, + "output_cost_per_token": 0.000024, "litellm_provider": "bedrock", "mode": "chat", "supports_tool_choice": true @@ -12560,8 +13251,8 @@ "max_tokens": 8191, "max_input_tokens": 100000, "max_output_tokens": 8191, - "input_cost_per_token": 8e-06, - "output_cost_per_token": 2.4e-05, + "input_cost_per_token": 0.000008, + "output_cost_per_token": 0.000024, "litellm_provider": "bedrock", "mode": "chat", "supports_tool_choice": true @@ -12570,8 +13261,8 @@ "max_tokens": 8191, "max_input_tokens": 100000, "max_output_tokens": 8191, - "input_cost_per_token": 8e-06, - "output_cost_per_token": 2.4e-05, + "input_cost_per_token": 0.000008, + "output_cost_per_token": 0.000024, "litellm_provider": "bedrock", "mode": "chat", "supports_tool_choice": true @@ -12598,8 +13289,8 @@ "max_tokens": 8191, "max_input_tokens": 100000, "max_output_tokens": 8191, - "input_cost_per_token": 8e-06, - "output_cost_per_token": 2.4e-05, + "input_cost_per_token": 0.000008, + "output_cost_per_token": 0.000024, "litellm_provider": "bedrock", "mode": "chat" }, @@ -12661,8 +13352,8 @@ "max_tokens": 8191, "max_input_tokens": 100000, "max_output_tokens": 8191, - "input_cost_per_token": 8e-06, - "output_cost_per_token": 2.4e-05, + "input_cost_per_token": 0.000008, + "output_cost_per_token": 0.000024, "litellm_provider": "bedrock", "mode": "chat", "supports_tool_choice": true @@ -12671,8 +13362,8 @@ "max_tokens": 8191, "max_input_tokens": 100000, "max_output_tokens": 8191, - "input_cost_per_token": 8e-06, - "output_cost_per_token": 2.4e-05, + "input_cost_per_token": 0.000008, + "output_cost_per_token": 0.000024, "litellm_provider": "bedrock", "mode": "chat", "supports_tool_choice": true @@ -12681,8 +13372,8 @@ "max_tokens": 8191, "max_input_tokens": 100000, "max_output_tokens": 8191, - "input_cost_per_token": 8e-06, - "output_cost_per_token": 2.4e-05, + "input_cost_per_token": 0.000008, + "output_cost_per_token": 0.000024, "litellm_provider": "bedrock", "mode": "chat", "supports_tool_choice": true @@ -12691,8 +13382,8 @@ "max_tokens": 8191, "max_input_tokens": 100000, "max_output_tokens": 8191, - "input_cost_per_token": 8e-06, - "output_cost_per_token": 2.4e-05, + "input_cost_per_token": 0.000008, + "output_cost_per_token": 0.000024, "litellm_provider": "bedrock", "mode": "chat", "supports_tool_choice": true @@ -12721,8 +13412,8 @@ "max_tokens": 8191, "max_input_tokens": 100000, "max_output_tokens": 8191, - "input_cost_per_token": 8e-06, - "output_cost_per_token": 2.4e-05, + "input_cost_per_token": 0.000008, + "output_cost_per_token": 0.000024, "litellm_provider": "bedrock", "mode": "chat", "supports_tool_choice": true @@ -12791,8 +13482,8 @@ "max_tokens": 8191, "max_input_tokens": 100000, "max_output_tokens": 8191, - "input_cost_per_token": 8e-06, - "output_cost_per_token": 2.4e-05, + "input_cost_per_token": 0.000008, + "output_cost_per_token": 0.000024, "litellm_provider": "bedrock", "mode": "chat", "supports_tool_choice": true @@ -12801,8 +13492,8 @@ "max_tokens": 8191, "max_input_tokens": 100000, "max_output_tokens": 8191, - "input_cost_per_token": 8e-06, - "output_cost_per_token": 2.4e-05, + "input_cost_per_token": 0.000008, + "output_cost_per_token": 0.000024, "litellm_provider": "bedrock", "mode": "chat", "supports_tool_choice": true @@ -12811,8 +13502,8 @@ "max_tokens": 8191, "max_input_tokens": 100000, "max_output_tokens": 8191, - "input_cost_per_token": 8e-06, - "output_cost_per_token": 2.4e-05, + "input_cost_per_token": 0.000008, + "output_cost_per_token": 0.000024, "litellm_provider": "bedrock", "mode": "chat", "supports_tool_choice": true @@ -12821,8 +13512,8 @@ "max_tokens": 8191, "max_input_tokens": 100000, "max_output_tokens": 8191, - "input_cost_per_token": 8e-06, - "output_cost_per_token": 2.4e-05, + "input_cost_per_token": 0.000008, + "output_cost_per_token": 0.000024, "litellm_provider": "bedrock", "mode": "chat", "supports_tool_choice": true @@ -12851,8 +13542,8 @@ "max_tokens": 8191, "max_input_tokens": 100000, "max_output_tokens": 8191, - "input_cost_per_token": 8e-06, - "output_cost_per_token": 2.4e-05, + "input_cost_per_token": 0.000008, + "output_cost_per_token": 0.000024, "litellm_provider": "bedrock", "mode": "chat", "supports_tool_choice": true @@ -12921,8 +13612,8 @@ "max_tokens": 8191, "max_input_tokens": 100000, "max_output_tokens": 8191, - "input_cost_per_token": 8e-07, - "output_cost_per_token": 2.4e-06, + "input_cost_per_token": 8e-7, + "output_cost_per_token": 0.0000024, "litellm_provider": "bedrock", "mode": "chat", "supports_tool_choice": true @@ -12931,8 +13622,8 @@ "max_tokens": 8191, "max_input_tokens": 100000, "max_output_tokens": 8191, - "input_cost_per_token": 8e-07, - "output_cost_per_token": 2.4e-06, + "input_cost_per_token": 8e-7, + "output_cost_per_token": 0.0000024, "litellm_provider": "bedrock", "mode": "chat", "supports_tool_choice": true @@ -12981,8 +13672,8 @@ "max_tokens": 8191, "max_input_tokens": 100000, "max_output_tokens": 8191, - "input_cost_per_token": 8e-07, - "output_cost_per_token": 2.4e-06, + "input_cost_per_token": 8e-7, + "output_cost_per_token": 0.0000024, "litellm_provider": "bedrock", "mode": "chat", "supports_tool_choice": true @@ -12991,8 +13682,8 @@ "max_tokens": 8191, "max_input_tokens": 100000, "max_output_tokens": 8191, - "input_cost_per_token": 2.23e-06, - "output_cost_per_token": 7.55e-06, + "input_cost_per_token": 0.00000223, + "output_cost_per_token": 0.00000755, "litellm_provider": "bedrock", "mode": "chat", "supports_tool_choice": true @@ -13021,8 +13712,8 @@ "max_tokens": 8191, "max_input_tokens": 100000, "max_output_tokens": 8191, - "input_cost_per_token": 2.48e-06, - "output_cost_per_token": 8.38e-06, + "input_cost_per_token": 0.00000248, + "output_cost_per_token": 0.00000838, "litellm_provider": "bedrock", "mode": "chat", "supports_tool_choice": true @@ -13054,9 +13745,9 @@ "max_query_tokens": 32000, "max_document_chunks_per_query": 100, "max_tokens_per_document_chunk": 512, - "input_cost_per_token": 0.0, + "input_cost_per_token": 0, "input_cost_per_query": 0.002, - "output_cost_per_token": 0.0, + "output_cost_per_token": 0, "litellm_provider": "bedrock", "mode": "rerank" }, @@ -13064,8 +13755,8 @@ "max_tokens": 4096, "max_input_tokens": 4096, "max_output_tokens": 4096, - "input_cost_per_token": 1.5e-06, - "output_cost_per_token": 2e-06, + "input_cost_per_token": 0.0000015, + "output_cost_per_token": 0.000002, "litellm_provider": "bedrock", "mode": "chat", "supports_tool_choice": true @@ -13094,8 +13785,8 @@ "max_tokens": 4096, "max_input_tokens": 4096, "max_output_tokens": 4096, - "input_cost_per_token": 3e-07, - "output_cost_per_token": 6e-07, + "input_cost_per_token": 3e-7, + "output_cost_per_token": 6e-7, "litellm_provider": "bedrock", "mode": "chat", "supports_tool_choice": true @@ -13124,8 +13815,8 @@ "max_tokens": 4096, "max_input_tokens": 128000, "max_output_tokens": 4096, - "input_cost_per_token": 3e-06, - "output_cost_per_token": 1.5e-05, + "input_cost_per_token": 0.000003, + "output_cost_per_token": 0.000015, "litellm_provider": "bedrock", "mode": "chat", "supports_tool_choice": true @@ -13134,8 +13825,8 @@ "max_tokens": 4096, "max_input_tokens": 128000, "max_output_tokens": 4096, - "input_cost_per_token": 5e-07, - "output_cost_per_token": 1.5e-06, + "input_cost_per_token": 5e-7, + "output_cost_per_token": 0.0000015, "litellm_provider": "bedrock", "mode": "chat", "supports_tool_choice": true @@ -13143,8 +13834,8 @@ "cohere.embed-english-v3": { "max_tokens": 512, "max_input_tokens": 512, - "input_cost_per_token": 1e-07, - "output_cost_per_token": 0.0, + "input_cost_per_token": 1e-7, + "output_cost_per_token": 0, "litellm_provider": "bedrock", "mode": "embedding", "supports_embedding_image_input": true @@ -13152,8 +13843,8 @@ "cohere.embed-multilingual-v3": { "max_tokens": 512, "max_input_tokens": 512, - "input_cost_per_token": 1e-07, - "output_cost_per_token": 0.0, + "input_cost_per_token": 1e-7, + "output_cost_per_token": 0, "litellm_provider": "bedrock", "mode": "embedding", "supports_embedding_image_input": true @@ -13162,8 +13853,8 @@ "max_tokens": 4096, "max_input_tokens": 128000, "max_output_tokens": 4096, - "input_cost_per_token": 1.35e-06, - "output_cost_per_token": 5.4e-06, + "input_cost_per_token": 0.00000135, + "output_cost_per_token": 0.0000054, "litellm_provider": "bedrock_converse", "mode": "chat", "supports_reasoning": true, @@ -13174,8 +13865,8 @@ "max_tokens": 4096, "max_input_tokens": 128000, "max_output_tokens": 4096, - "input_cost_per_token": 7.2e-07, - "output_cost_per_token": 7.2e-07, + "input_cost_per_token": 7.2e-7, + "output_cost_per_token": 7.2e-7, "litellm_provider": "bedrock_converse", "mode": "chat", "supports_function_calling": true, @@ -13185,8 +13876,8 @@ "max_tokens": 4096, "max_input_tokens": 4096, "max_output_tokens": 4096, - "input_cost_per_token": 7.5e-07, - "output_cost_per_token": 1e-06, + "input_cost_per_token": 7.5e-7, + "output_cost_per_token": 0.000001, "litellm_provider": "bedrock", "mode": "chat" }, @@ -13194,8 +13885,8 @@ "max_tokens": 4096, "max_input_tokens": 4096, "max_output_tokens": 4096, - "input_cost_per_token": 1.95e-06, - "output_cost_per_token": 2.56e-06, + "input_cost_per_token": 0.00000195, + "output_cost_per_token": 0.00000256, "litellm_provider": "bedrock", "mode": "chat" }, @@ -13203,8 +13894,8 @@ "max_tokens": 8192, "max_input_tokens": 8192, "max_output_tokens": 8192, - "input_cost_per_token": 3e-07, - "output_cost_per_token": 6e-07, + "input_cost_per_token": 3e-7, + "output_cost_per_token": 6e-7, "litellm_provider": "bedrock", "mode": "chat" }, @@ -13212,8 +13903,8 @@ "max_tokens": 8192, "max_input_tokens": 8192, "max_output_tokens": 8192, - "input_cost_per_token": 3e-07, - "output_cost_per_token": 6e-07, + "input_cost_per_token": 3e-7, + "output_cost_per_token": 6e-7, "litellm_provider": "bedrock", "mode": "chat" }, @@ -13221,8 +13912,8 @@ "max_tokens": 8192, "max_input_tokens": 8192, "max_output_tokens": 8192, - "input_cost_per_token": 3e-07, - "output_cost_per_token": 6e-07, + "input_cost_per_token": 3e-7, + "output_cost_per_token": 6e-7, "litellm_provider": "bedrock", "mode": "chat" }, @@ -13230,8 +13921,8 @@ "max_tokens": 8192, "max_input_tokens": 8192, "max_output_tokens": 8192, - "input_cost_per_token": 3.6e-07, - "output_cost_per_token": 7.2e-07, + "input_cost_per_token": 3.6e-7, + "output_cost_per_token": 7.2e-7, "litellm_provider": "bedrock", "mode": "chat" }, @@ -13239,8 +13930,8 @@ "max_tokens": 8192, "max_input_tokens": 8192, "max_output_tokens": 8192, - "input_cost_per_token": 3.5e-07, - "output_cost_per_token": 6.9e-07, + "input_cost_per_token": 3.5e-7, + "output_cost_per_token": 6.9e-7, "litellm_provider": "bedrock", "mode": "chat" }, @@ -13248,8 +13939,8 @@ "max_tokens": 8192, "max_input_tokens": 8192, "max_output_tokens": 8192, - "input_cost_per_token": 3.2e-07, - "output_cost_per_token": 6.5e-07, + "input_cost_per_token": 3.2e-7, + "output_cost_per_token": 6.5e-7, "litellm_provider": "bedrock", "mode": "chat" }, @@ -13257,8 +13948,8 @@ "max_tokens": 8192, "max_input_tokens": 8192, "max_output_tokens": 8192, - "input_cost_per_token": 3.9e-07, - "output_cost_per_token": 7.8e-07, + "input_cost_per_token": 3.9e-7, + "output_cost_per_token": 7.8e-7, "litellm_provider": "bedrock", "mode": "chat" }, @@ -13266,8 +13957,8 @@ "max_tokens": 8192, "max_input_tokens": 8192, "max_output_tokens": 8192, - "input_cost_per_token": 5e-07, - "output_cost_per_token": 1.01e-06, + "input_cost_per_token": 5e-7, + "output_cost_per_token": 0.00000101, "litellm_provider": "bedrock", "mode": "chat" }, @@ -13275,8 +13966,8 @@ "max_tokens": 8192, "max_input_tokens": 8192, "max_output_tokens": 8192, - "input_cost_per_token": 2.65e-06, - "output_cost_per_token": 3.5e-06, + "input_cost_per_token": 0.00000265, + "output_cost_per_token": 0.0000035, "litellm_provider": "bedrock", "mode": "chat" }, @@ -13284,8 +13975,8 @@ "max_tokens": 8192, "max_input_tokens": 8192, "max_output_tokens": 8192, - "input_cost_per_token": 2.65e-06, - "output_cost_per_token": 3.5e-06, + "input_cost_per_token": 0.00000265, + "output_cost_per_token": 0.0000035, "litellm_provider": "bedrock", "mode": "chat" }, @@ -13293,8 +13984,8 @@ "max_tokens": 8192, "max_input_tokens": 8192, "max_output_tokens": 8192, - "input_cost_per_token": 2.65e-06, - "output_cost_per_token": 3.5e-06, + "input_cost_per_token": 0.00000265, + "output_cost_per_token": 0.0000035, "litellm_provider": "bedrock", "mode": "chat" }, @@ -13302,8 +13993,8 @@ "max_tokens": 8192, "max_input_tokens": 8192, "max_output_tokens": 8192, - "input_cost_per_token": 3.18e-06, - "output_cost_per_token": 4.2e-06, + "input_cost_per_token": 0.00000318, + "output_cost_per_token": 0.0000042, "litellm_provider": "bedrock", "mode": "chat" }, @@ -13311,8 +14002,8 @@ "max_tokens": 8192, "max_input_tokens": 8192, "max_output_tokens": 8192, - "input_cost_per_token": 3.05e-06, - "output_cost_per_token": 4.03e-06, + "input_cost_per_token": 0.00000305, + "output_cost_per_token": 0.00000403, "litellm_provider": "bedrock", "mode": "chat" }, @@ -13320,8 +14011,8 @@ "max_tokens": 8192, "max_input_tokens": 8192, "max_output_tokens": 8192, - "input_cost_per_token": 2.86e-06, - "output_cost_per_token": 3.78e-06, + "input_cost_per_token": 0.00000286, + "output_cost_per_token": 0.00000378, "litellm_provider": "bedrock", "mode": "chat" }, @@ -13329,8 +14020,8 @@ "max_tokens": 8192, "max_input_tokens": 8192, "max_output_tokens": 8192, - "input_cost_per_token": 3.45e-06, - "output_cost_per_token": 4.55e-06, + "input_cost_per_token": 0.00000345, + "output_cost_per_token": 0.00000455, "litellm_provider": "bedrock", "mode": "chat" }, @@ -13338,8 +14029,8 @@ "max_tokens": 8192, "max_input_tokens": 8192, "max_output_tokens": 8192, - "input_cost_per_token": 4.45e-06, - "output_cost_per_token": 5.88e-06, + "input_cost_per_token": 0.00000445, + "output_cost_per_token": 0.00000588, "litellm_provider": "bedrock", "mode": "chat" }, @@ -13347,8 +14038,8 @@ "max_tokens": 128000, "max_input_tokens": 128000, "max_output_tokens": 2048, - "input_cost_per_token": 2.2e-07, - "output_cost_per_token": 2.2e-07, + "input_cost_per_token": 2.2e-7, + "output_cost_per_token": 2.2e-7, "litellm_provider": "bedrock", "mode": "chat", "supports_function_calling": true, @@ -13358,8 +14049,8 @@ "max_tokens": 128000, "max_input_tokens": 128000, "max_output_tokens": 2048, - "input_cost_per_token": 2.2e-07, - "output_cost_per_token": 2.2e-07, + "input_cost_per_token": 2.2e-7, + "output_cost_per_token": 2.2e-7, "litellm_provider": "bedrock", "mode": "chat", "supports_function_calling": true, @@ -13369,8 +14060,8 @@ "max_tokens": 128000, "max_input_tokens": 128000, "max_output_tokens": 2048, - "input_cost_per_token": 9.9e-07, - "output_cost_per_token": 9.9e-07, + "input_cost_per_token": 9.9e-7, + "output_cost_per_token": 9.9e-7, "litellm_provider": "bedrock", "mode": "chat", "supports_function_calling": true, @@ -13380,8 +14071,8 @@ "max_tokens": 128000, "max_input_tokens": 128000, "max_output_tokens": 2048, - "input_cost_per_token": 9.9e-07, - "output_cost_per_token": 9.9e-07, + "input_cost_per_token": 9.9e-7, + "output_cost_per_token": 9.9e-7, "litellm_provider": "bedrock", "mode": "chat", "supports_function_calling": true, @@ -13391,8 +14082,8 @@ "max_tokens": 128000, "max_input_tokens": 128000, "max_output_tokens": 4096, - "input_cost_per_token": 5.32e-06, - "output_cost_per_token": 1.6e-05, + "input_cost_per_token": 0.00000532, + "output_cost_per_token": 0.000016, "litellm_provider": "bedrock", "mode": "chat", "supports_function_calling": true, @@ -13402,8 +14093,8 @@ "max_tokens": 128000, "max_input_tokens": 128000, "max_output_tokens": 4096, - "input_cost_per_token": 5.32e-06, - "output_cost_per_token": 1.6e-05, + "input_cost_per_token": 0.00000532, + "output_cost_per_token": 0.000016, "litellm_provider": "bedrock", "mode": "chat", "supports_function_calling": true, @@ -13413,8 +14104,8 @@ "max_tokens": 128000, "max_input_tokens": 128000, "max_output_tokens": 4096, - "input_cost_per_token": 1e-07, - "output_cost_per_token": 1e-07, + "input_cost_per_token": 1e-7, + "output_cost_per_token": 1e-7, "litellm_provider": "bedrock", "mode": "chat", "supports_function_calling": true, @@ -13424,8 +14115,8 @@ "max_tokens": 128000, "max_input_tokens": 128000, "max_output_tokens": 4096, - "input_cost_per_token": 1e-07, - "output_cost_per_token": 1e-07, + "input_cost_per_token": 1e-7, + "output_cost_per_token": 1e-7, "litellm_provider": "bedrock", "mode": "chat", "supports_function_calling": true, @@ -13435,8 +14126,8 @@ "max_tokens": 128000, "max_input_tokens": 128000, "max_output_tokens": 4096, - "input_cost_per_token": 1.3e-07, - "output_cost_per_token": 1.3e-07, + "input_cost_per_token": 1.3e-7, + "output_cost_per_token": 1.3e-7, "litellm_provider": "bedrock", "mode": "chat", "supports_function_calling": true, @@ -13446,8 +14137,8 @@ "max_tokens": 128000, "max_input_tokens": 128000, "max_output_tokens": 4096, - "input_cost_per_token": 1.5e-07, - "output_cost_per_token": 1.5e-07, + "input_cost_per_token": 1.5e-7, + "output_cost_per_token": 1.5e-7, "litellm_provider": "bedrock", "mode": "chat", "supports_function_calling": true, @@ -13457,8 +14148,8 @@ "max_tokens": 128000, "max_input_tokens": 128000, "max_output_tokens": 4096, - "input_cost_per_token": 1.5e-07, - "output_cost_per_token": 1.5e-07, + "input_cost_per_token": 1.5e-7, + "output_cost_per_token": 1.5e-7, "litellm_provider": "bedrock", "mode": "chat", "supports_function_calling": true, @@ -13468,8 +14159,8 @@ "max_tokens": 128000, "max_input_tokens": 128000, "max_output_tokens": 4096, - "input_cost_per_token": 1.9e-07, - "output_cost_per_token": 1.9e-07, + "input_cost_per_token": 1.9e-7, + "output_cost_per_token": 1.9e-7, "litellm_provider": "bedrock", "mode": "chat", "supports_function_calling": true, @@ -13479,8 +14170,8 @@ "max_tokens": 128000, "max_input_tokens": 128000, "max_output_tokens": 4096, - "input_cost_per_token": 3.5e-07, - "output_cost_per_token": 3.5e-07, + "input_cost_per_token": 3.5e-7, + "output_cost_per_token": 3.5e-7, "litellm_provider": "bedrock", "mode": "chat", "supports_function_calling": true, @@ -13491,8 +14182,8 @@ "max_tokens": 128000, "max_input_tokens": 128000, "max_output_tokens": 4096, - "input_cost_per_token": 3.5e-07, - "output_cost_per_token": 3.5e-07, + "input_cost_per_token": 3.5e-7, + "output_cost_per_token": 3.5e-7, "litellm_provider": "bedrock", "mode": "chat", "supports_function_calling": true, @@ -13503,8 +14194,8 @@ "max_tokens": 128000, "max_input_tokens": 128000, "max_output_tokens": 4096, - "input_cost_per_token": 2e-06, - "output_cost_per_token": 2e-06, + "input_cost_per_token": 0.000002, + "output_cost_per_token": 0.000002, "litellm_provider": "bedrock", "mode": "chat", "supports_function_calling": true, @@ -13515,8 +14206,8 @@ "max_tokens": 128000, "max_input_tokens": 128000, "max_output_tokens": 4096, - "input_cost_per_token": 2e-06, - "output_cost_per_token": 2e-06, + "input_cost_per_token": 0.000002, + "output_cost_per_token": 0.000002, "litellm_provider": "bedrock", "mode": "chat", "supports_function_calling": true, @@ -13527,8 +14218,8 @@ "max_tokens": 4096, "max_input_tokens": 128000, "max_output_tokens": 4096, - "input_cost_per_token": 7.2e-07, - "output_cost_per_token": 7.2e-07, + "input_cost_per_token": 7.2e-7, + "output_cost_per_token": 7.2e-7, "litellm_provider": "bedrock_converse", "mode": "chat", "supports_function_calling": true, @@ -13538,10 +14229,10 @@ "max_tokens": 4096, "max_input_tokens": 128000, "max_output_tokens": 4096, - "input_cost_per_token": 2.4e-07, - "input_cost_per_token_batches": 1.2e-07, - "output_cost_per_token": 9.7e-07, - "output_cost_per_token_batches": 4.85e-07, + "input_cost_per_token": 2.4e-7, + "input_cost_per_token_batches": 1.2e-7, + "output_cost_per_token": 9.7e-7, + "output_cost_per_token_batches": 4.85e-7, "litellm_provider": "bedrock_converse", "mode": "chat", "supports_function_calling": true, @@ -13559,10 +14250,10 @@ "max_tokens": 4096, "max_input_tokens": 128000, "max_output_tokens": 4096, - "input_cost_per_token": 2.4e-07, - "input_cost_per_token_batches": 1.2e-07, - "output_cost_per_token": 9.7e-07, - "output_cost_per_token_batches": 4.85e-07, + "input_cost_per_token": 2.4e-7, + "input_cost_per_token_batches": 1.2e-7, + "output_cost_per_token": 9.7e-7, + "output_cost_per_token_batches": 4.85e-7, "litellm_provider": "bedrock_converse", "mode": "chat", "supports_function_calling": true, @@ -13580,10 +14271,10 @@ "max_tokens": 4096, "max_input_tokens": 128000, "max_output_tokens": 4096, - "input_cost_per_token": 1.7e-07, - "input_cost_per_token_batches": 8.5e-08, - "output_cost_per_token": 6.6e-07, - "output_cost_per_token_batches": 3.3e-07, + "input_cost_per_token": 1.7e-7, + "input_cost_per_token_batches": 8.5e-8, + "output_cost_per_token": 6.6e-7, + "output_cost_per_token_batches": 3.3e-7, "litellm_provider": "bedrock_converse", "mode": "chat", "supports_function_calling": true, @@ -13601,10 +14292,10 @@ "max_tokens": 4096, "max_input_tokens": 128000, "max_output_tokens": 4096, - "input_cost_per_token": 1.7e-07, - "input_cost_per_token_batches": 8.5e-08, - "output_cost_per_token": 6.6e-07, - "output_cost_per_token_batches": 3.3e-07, + "input_cost_per_token": 1.7e-7, + "input_cost_per_token_batches": 8.5e-8, + "output_cost_per_token": 6.6e-7, + "output_cost_per_token_batches": 3.3e-7, "litellm_provider": "bedrock_converse", "mode": "chat", "supports_function_calling": true, @@ -13706,8 +14397,8 @@ "max_tokens": 4096, "max_input_tokens": 4096, "max_output_tokens": 4096, - "input_cost_per_token": 0.0, - "output_cost_per_token": 0.0, + "input_cost_per_token": 0, + "output_cost_per_token": 0, "litellm_provider": "sagemaker", "mode": "completion" }, @@ -13715,8 +14406,8 @@ "max_tokens": 4096, "max_input_tokens": 4096, "max_output_tokens": 4096, - "input_cost_per_token": 0.0, - "output_cost_per_token": 0.0, + "input_cost_per_token": 0, + "output_cost_per_token": 0, "litellm_provider": "sagemaker", "mode": "chat" }, @@ -13724,8 +14415,8 @@ "max_tokens": 4096, "max_input_tokens": 4096, "max_output_tokens": 4096, - "input_cost_per_token": 0.0, - "output_cost_per_token": 0.0, + "input_cost_per_token": 0, + "output_cost_per_token": 0, "litellm_provider": "sagemaker", "mode": "completion" }, @@ -13733,8 +14424,8 @@ "max_tokens": 4096, "max_input_tokens": 4096, "max_output_tokens": 4096, - "input_cost_per_token": 0.0, - "output_cost_per_token": 0.0, + "input_cost_per_token": 0, + "output_cost_per_token": 0, "litellm_provider": "sagemaker", "mode": "chat" }, @@ -13742,8 +14433,8 @@ "max_tokens": 4096, "max_input_tokens": 4096, "max_output_tokens": 4096, - "input_cost_per_token": 0.0, - "output_cost_per_token": 0.0, + "input_cost_per_token": 0, + "output_cost_per_token": 0, "litellm_provider": "sagemaker", "mode": "completion" }, @@ -13751,63 +14442,63 @@ "max_tokens": 4096, "max_input_tokens": 4096, "max_output_tokens": 4096, - "input_cost_per_token": 0.0, - "output_cost_per_token": 0.0, + "input_cost_per_token": 0, + "output_cost_per_token": 0, "litellm_provider": "sagemaker", "mode": "chat" }, "together-ai-up-to-4b": { - "input_cost_per_token": 1e-07, - "output_cost_per_token": 1e-07, + "input_cost_per_token": 1e-7, + "output_cost_per_token": 1e-7, "litellm_provider": "together_ai", "mode": "chat" }, "together-ai-4.1b-8b": { - "input_cost_per_token": 2e-07, - "output_cost_per_token": 2e-07, + "input_cost_per_token": 2e-7, + "output_cost_per_token": 2e-7, "litellm_provider": "together_ai", "mode": "chat" }, "together-ai-8.1b-21b": { "max_tokens": 1000, - "input_cost_per_token": 3e-07, - "output_cost_per_token": 3e-07, + "input_cost_per_token": 3e-7, + "output_cost_per_token": 3e-7, "litellm_provider": "together_ai", "mode": "chat" }, "together-ai-21.1b-41b": { - "input_cost_per_token": 8e-07, - "output_cost_per_token": 8e-07, + "input_cost_per_token": 8e-7, + "output_cost_per_token": 8e-7, "litellm_provider": "together_ai", "mode": "chat" }, "together-ai-41.1b-80b": { - "input_cost_per_token": 9e-07, - "output_cost_per_token": 9e-07, + "input_cost_per_token": 9e-7, + "output_cost_per_token": 9e-7, "litellm_provider": "together_ai", "mode": "chat" }, "together-ai-81.1b-110b": { - "input_cost_per_token": 1.8e-06, - "output_cost_per_token": 1.8e-06, + "input_cost_per_token": 0.0000018, + "output_cost_per_token": 0.0000018, "litellm_provider": "together_ai", "mode": "chat" }, "together-ai-embedding-up-to-150m": { - "input_cost_per_token": 8e-09, - "output_cost_per_token": 0.0, + "input_cost_per_token": 8e-9, + "output_cost_per_token": 0, "litellm_provider": "together_ai", "mode": "embedding" }, "together-ai-embedding-151m-to-350m": { - "input_cost_per_token": 1.6e-08, - "output_cost_per_token": 0.0, + "input_cost_per_token": 1.6e-8, + "output_cost_per_token": 0, "litellm_provider": "together_ai", "mode": "embedding" }, "together_ai/meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo": { - "input_cost_per_token": 1.8e-07, - "output_cost_per_token": 1.8e-07, + "input_cost_per_token": 1.8e-7, + "output_cost_per_token": 1.8e-7, "litellm_provider": "together_ai", "supports_function_calling": true, "supports_parallel_function_calling": true, @@ -13816,8 +14507,8 @@ "supports_tool_choice": true }, "together_ai/meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo": { - "input_cost_per_token": 8.8e-07, - "output_cost_per_token": 8.8e-07, + "input_cost_per_token": 8.8e-7, + "output_cost_per_token": 8.8e-7, "litellm_provider": "together_ai", "supports_function_calling": true, "supports_parallel_function_calling": true, @@ -13826,8 +14517,8 @@ "supports_tool_choice": true }, "together_ai/meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo": { - "input_cost_per_token": 3.5e-06, - "output_cost_per_token": 3.5e-06, + "input_cost_per_token": 0.0000035, + "output_cost_per_token": 0.0000035, "litellm_provider": "together_ai", "supports_function_calling": true, "supports_parallel_function_calling": true, @@ -13835,8 +14526,8 @@ "supports_tool_choice": true }, "together_ai/meta-llama/Llama-3.3-70B-Instruct-Turbo": { - "input_cost_per_token": 8.8e-07, - "output_cost_per_token": 8.8e-07, + "input_cost_per_token": 8.8e-7, + "output_cost_per_token": 8.8e-7, "litellm_provider": "together_ai", "supports_function_calling": true, "supports_parallel_function_calling": true, @@ -13855,8 +14546,8 @@ "supports_tool_choice": true }, "together_ai/mistralai/Mixtral-8x7B-Instruct-v0.1": { - "input_cost_per_token": 6e-07, - "output_cost_per_token": 6e-07, + "input_cost_per_token": 6e-7, + "output_cost_per_token": 6e-7, "litellm_provider": "together_ai", "supports_function_calling": true, "supports_parallel_function_calling": true, @@ -13880,8 +14571,8 @@ "supports_tool_choice": true }, "together_ai/meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8": { - "input_cost_per_token": 2.7e-07, - "output_cost_per_token": 8.5e-07, + "input_cost_per_token": 2.7e-7, + "output_cost_per_token": 8.5e-7, "litellm_provider": "together_ai", "supports_function_calling": true, "supports_parallel_function_calling": true, @@ -13889,8 +14580,8 @@ "supports_tool_choice": true }, "together_ai/meta-llama/Llama-4-Scout-17B-16E-Instruct": { - "input_cost_per_token": 1.8e-07, - "output_cost_per_token": 5.9e-07, + "input_cost_per_token": 1.8e-7, + "output_cost_per_token": 5.9e-7, "litellm_provider": "together_ai", "supports_function_calling": true, "supports_parallel_function_calling": true, @@ -13919,8 +14610,8 @@ "supports_tool_choice": true }, "together_ai/deepseek-ai/DeepSeek-V3": { - "input_cost_per_token": 1.25e-06, - "output_cost_per_token": 1.25e-06, + "input_cost_per_token": 0.00000125, + "output_cost_per_token": 0.00000125, "max_tokens": 8192, "max_input_tokens": 65536, "max_output_tokens": 8192, @@ -13931,8 +14622,8 @@ "supports_tool_choice": true }, "together_ai/deepseek-ai/DeepSeek-R1": { - "input_cost_per_token": 3e-06, - "output_cost_per_token": 7e-06, + "input_cost_per_token": 0.000003, + "output_cost_per_token": 0.000007, "max_tokens": 20480, "max_input_tokens": 128000, "max_output_tokens": 20480, @@ -13950,8 +14641,8 @@ "supports_tool_choice": true }, "together_ai/moonshotai/Kimi-K2-Instruct": { - "input_cost_per_token": 1e-06, - "output_cost_per_token": 3e-06, + "input_cost_per_token": 0.000001, + "output_cost_per_token": 0.000003, "litellm_provider": "together_ai", "supports_function_calling": true, "supports_tool_choice": true, @@ -13963,8 +14654,8 @@ "max_tokens": 8192, "max_input_tokens": 8192, "max_output_tokens": 8192, - "input_cost_per_token": 0.0, - "output_cost_per_token": 0.0, + "input_cost_per_token": 0, + "output_cost_per_token": 0, "litellm_provider": "ollama", "mode": "completion" }, @@ -13972,8 +14663,8 @@ "max_tokens": 32768, "max_input_tokens": 32768, "max_output_tokens": 8192, - "input_cost_per_token": 0.0, - "output_cost_per_token": 0.0, + "input_cost_per_token": 0, + "output_cost_per_token": 0, "litellm_provider": "ollama", "mode": "chat", "supports_function_calling": false @@ -13982,8 +14673,8 @@ "max_tokens": 32768, "max_input_tokens": 32768, "max_output_tokens": 8192, - "input_cost_per_token": 0.0, - "output_cost_per_token": 0.0, + "input_cost_per_token": 0, + "output_cost_per_token": 0, "litellm_provider": "ollama", "mode": "chat", "supports_function_calling": true @@ -13992,8 +14683,8 @@ "max_tokens": 8192, "max_input_tokens": 8192, "max_output_tokens": 8192, - "input_cost_per_token": 0.0, - "output_cost_per_token": 0.0, + "input_cost_per_token": 0, + "output_cost_per_token": 0, "litellm_provider": "ollama", "mode": "completion", "supports_function_calling": true @@ -14002,8 +14693,8 @@ "max_tokens": 32768, "max_input_tokens": 32768, "max_output_tokens": 8192, - "input_cost_per_token": 0.0, - "output_cost_per_token": 0.0, + "input_cost_per_token": 0, + "output_cost_per_token": 0, "litellm_provider": "ollama", "mode": "chat", "supports_function_calling": true @@ -14012,8 +14703,8 @@ "max_tokens": 8192, "max_input_tokens": 8192, "max_output_tokens": 8192, - "input_cost_per_token": 0.0, - "output_cost_per_token": 0.0, + "input_cost_per_token": 0, + "output_cost_per_token": 0, "litellm_provider": "ollama", "mode": "completion", "supports_function_calling": true @@ -14022,8 +14713,8 @@ "max_tokens": 32768, "max_input_tokens": 32768, "max_output_tokens": 8192, - "input_cost_per_token": 0.0, - "output_cost_per_token": 0.0, + "input_cost_per_token": 0, + "output_cost_per_token": 0, "litellm_provider": "ollama", "mode": "chat", "supports_function_calling": true @@ -14032,8 +14723,8 @@ "max_tokens": 4096, "max_input_tokens": 4096, "max_output_tokens": 4096, - "input_cost_per_token": 0.0, - "output_cost_per_token": 0.0, + "input_cost_per_token": 0, + "output_cost_per_token": 0, "litellm_provider": "ollama", "mode": "chat" }, @@ -14041,8 +14732,8 @@ "max_tokens": 4096, "max_input_tokens": 4096, "max_output_tokens": 4096, - "input_cost_per_token": 0.0, - "output_cost_per_token": 0.0, + "input_cost_per_token": 0, + "output_cost_per_token": 0, "litellm_provider": "ollama", "mode": "chat" }, @@ -14050,8 +14741,8 @@ "max_tokens": 4096, "max_input_tokens": 4096, "max_output_tokens": 4096, - "input_cost_per_token": 0.0, - "output_cost_per_token": 0.0, + "input_cost_per_token": 0, + "output_cost_per_token": 0, "litellm_provider": "ollama", "mode": "chat" }, @@ -14059,8 +14750,8 @@ "max_tokens": 4096, "max_input_tokens": 4096, "max_output_tokens": 4096, - "input_cost_per_token": 0.0, - "output_cost_per_token": 0.0, + "input_cost_per_token": 0, + "output_cost_per_token": 0, "litellm_provider": "ollama", "mode": "chat" }, @@ -14068,8 +14759,8 @@ "max_tokens": 4096, "max_input_tokens": 4096, "max_output_tokens": 4096, - "input_cost_per_token": 0.0, - "output_cost_per_token": 0.0, + "input_cost_per_token": 0, + "output_cost_per_token": 0, "litellm_provider": "ollama", "mode": "completion" }, @@ -14077,8 +14768,8 @@ "max_tokens": 8192, "max_input_tokens": 8192, "max_output_tokens": 8192, - "input_cost_per_token": 0.0, - "output_cost_per_token": 0.0, + "input_cost_per_token": 0, + "output_cost_per_token": 0, "litellm_provider": "ollama", "mode": "chat" }, @@ -14086,8 +14777,8 @@ "max_tokens": 8192, "max_input_tokens": 8192, "max_output_tokens": 8192, - "input_cost_per_token": 0.0, - "output_cost_per_token": 0.0, + "input_cost_per_token": 0, + "output_cost_per_token": 0, "litellm_provider": "ollama", "mode": "chat" }, @@ -14095,8 +14786,8 @@ "max_tokens": 8192, "max_input_tokens": 8192, "max_output_tokens": 8192, - "input_cost_per_token": 0.0, - "output_cost_per_token": 0.0, + "input_cost_per_token": 0, + "output_cost_per_token": 0, "litellm_provider": "ollama", "mode": "chat" }, @@ -14104,8 +14795,8 @@ "max_tokens": 32768, "max_input_tokens": 8192, "max_output_tokens": 8192, - "input_cost_per_token": 0.0, - "output_cost_per_token": 0.0, + "input_cost_per_token": 0, + "output_cost_per_token": 0, "litellm_provider": "ollama", "mode": "chat", "supports_function_calling": true @@ -14114,8 +14805,8 @@ "max_tokens": 65536, "max_input_tokens": 65536, "max_output_tokens": 8192, - "input_cost_per_token": 0.0, - "output_cost_per_token": 0.0, + "input_cost_per_token": 0, + "output_cost_per_token": 0, "litellm_provider": "ollama", "mode": "chat", "supports_function_calling": true @@ -14124,8 +14815,8 @@ "max_tokens": 8192, "max_input_tokens": 8192, "max_output_tokens": 8192, - "input_cost_per_token": 0.0, - "output_cost_per_token": 0.0, + "input_cost_per_token": 0, + "output_cost_per_token": 0, "litellm_provider": "ollama", "mode": "completion", "supports_function_calling": true @@ -14134,8 +14825,8 @@ "max_tokens": 8192, "max_input_tokens": 8192, "max_output_tokens": 8192, - "input_cost_per_token": 0.0, - "output_cost_per_token": 0.0, + "input_cost_per_token": 0, + "output_cost_per_token": 0, "litellm_provider": "ollama", "mode": "chat", "supports_function_calling": true @@ -14144,8 +14835,8 @@ "max_tokens": 32768, "max_input_tokens": 32768, "max_output_tokens": 32768, - "input_cost_per_token": 0.0, - "output_cost_per_token": 0.0, + "input_cost_per_token": 0, + "output_cost_per_token": 0, "litellm_provider": "ollama", "mode": "chat", "supports_function_calling": true @@ -14154,8 +14845,8 @@ "max_tokens": 32768, "max_input_tokens": 32768, "max_output_tokens": 32768, - "input_cost_per_token": 0.0, - "output_cost_per_token": 0.0, + "input_cost_per_token": 0, + "output_cost_per_token": 0, "litellm_provider": "ollama", "mode": "chat", "supports_function_calling": true @@ -14164,8 +14855,8 @@ "max_tokens": 65536, "max_input_tokens": 65536, "max_output_tokens": 65536, - "input_cost_per_token": 0.0, - "output_cost_per_token": 0.0, + "input_cost_per_token": 0, + "output_cost_per_token": 0, "litellm_provider": "ollama", "mode": "chat", "supports_function_calling": true @@ -14174,8 +14865,8 @@ "max_tokens": 4096, "max_input_tokens": 4096, "max_output_tokens": 4096, - "input_cost_per_token": 0.0, - "output_cost_per_token": 0.0, + "input_cost_per_token": 0, + "output_cost_per_token": 0, "litellm_provider": "ollama", "mode": "completion" }, @@ -14183,8 +14874,8 @@ "max_tokens": 4096, "max_input_tokens": 4096, "max_output_tokens": 4096, - "input_cost_per_token": 0.0, - "output_cost_per_token": 0.0, + "input_cost_per_token": 0, + "output_cost_per_token": 0, "litellm_provider": "ollama", "mode": "completion" }, @@ -14192,8 +14883,8 @@ "max_tokens": 2048, "max_input_tokens": 2048, "max_output_tokens": 2048, - "input_cost_per_token": 0.0, - "output_cost_per_token": 0.0, + "input_cost_per_token": 0, + "output_cost_per_token": 0, "litellm_provider": "ollama", "mode": "completion" }, @@ -14201,8 +14892,8 @@ "max_tokens": 4096, "max_input_tokens": 4096, "max_output_tokens": 4096, - "input_cost_per_token": 7e-07, - "output_cost_per_token": 9e-07, + "input_cost_per_token": 7e-7, + "output_cost_per_token": 9e-7, "litellm_provider": "deepinfra", "mode": "chat", "supports_tool_choice": true @@ -14211,8 +14902,8 @@ "max_tokens": 4096, "max_input_tokens": 4096, "max_output_tokens": 4096, - "input_cost_per_token": 2.2e-07, - "output_cost_per_token": 2.2e-07, + "input_cost_per_token": 2.2e-7, + "output_cost_per_token": 2.2e-7, "litellm_provider": "deepinfra", "mode": "chat", "supports_tool_choice": true @@ -14221,8 +14912,8 @@ "max_tokens": 8191, "max_input_tokens": 32768, "max_output_tokens": 8191, - "input_cost_per_token": 1.3e-07, - "output_cost_per_token": 1.3e-07, + "input_cost_per_token": 1.3e-7, + "output_cost_per_token": 1.3e-7, "litellm_provider": "deepinfra", "mode": "chat", "supports_tool_choice": true @@ -14231,8 +14922,8 @@ "max_tokens": 4096, "max_input_tokens": 4096, "max_output_tokens": 4096, - "input_cost_per_token": 7e-07, - "output_cost_per_token": 9e-07, + "input_cost_per_token": 7e-7, + "output_cost_per_token": 9e-7, "litellm_provider": "deepinfra", "mode": "chat", "supports_tool_choice": true @@ -14241,8 +14932,8 @@ "max_tokens": 8191, "max_input_tokens": 32768, "max_output_tokens": 8191, - "input_cost_per_token": 2.7e-07, - "output_cost_per_token": 2.7e-07, + "input_cost_per_token": 2.7e-7, + "output_cost_per_token": 2.7e-7, "litellm_provider": "deepinfra", "mode": "chat", "supports_tool_choice": true @@ -14251,8 +14942,8 @@ "max_tokens": 4096, "max_input_tokens": 4096, "max_output_tokens": 4096, - "input_cost_per_token": 6e-07, - "output_cost_per_token": 6e-07, + "input_cost_per_token": 6e-7, + "output_cost_per_token": 6e-7, "litellm_provider": "deepinfra", "mode": "chat", "supports_tool_choice": true @@ -14261,8 +14952,8 @@ "max_tokens": 4096, "max_input_tokens": 32000, "max_output_tokens": 4096, - "input_cost_per_token": 2.7e-07, - "output_cost_per_token": 2.7e-07, + "input_cost_per_token": 2.7e-7, + "output_cost_per_token": 2.7e-7, "litellm_provider": "deepinfra", "mode": "completion" }, @@ -14270,8 +14961,8 @@ "max_tokens": 4096, "max_input_tokens": 16384, "max_output_tokens": 4096, - "input_cost_per_token": 6e-07, - "output_cost_per_token": 6e-07, + "input_cost_per_token": 6e-7, + "output_cost_per_token": 6e-7, "litellm_provider": "deepinfra", "mode": "chat", "supports_tool_choice": true @@ -14280,8 +14971,8 @@ "max_tokens": 8191, "max_input_tokens": 32768, "max_output_tokens": 8191, - "input_cost_per_token": 2.7e-07, - "output_cost_per_token": 2.7e-07, + "input_cost_per_token": 2.7e-7, + "output_cost_per_token": 2.7e-7, "litellm_provider": "deepinfra", "mode": "chat", "supports_tool_choice": true @@ -14290,8 +14981,8 @@ "max_tokens": 4096, "max_input_tokens": 4096, "max_output_tokens": 4096, - "input_cost_per_token": 7e-07, - "output_cost_per_token": 9e-07, + "input_cost_per_token": 7e-7, + "output_cost_per_token": 9e-7, "litellm_provider": "deepinfra", "mode": "chat", "supports_tool_choice": true @@ -14300,8 +14991,8 @@ "max_tokens": 4096, "max_input_tokens": 4096, "max_output_tokens": 4096, - "input_cost_per_token": 6e-07, - "output_cost_per_token": 6e-07, + "input_cost_per_token": 6e-7, + "output_cost_per_token": 6e-7, "litellm_provider": "deepinfra", "mode": "chat", "supports_tool_choice": true @@ -14310,8 +15001,8 @@ "max_tokens": 4096, "max_input_tokens": 200000, "max_output_tokens": 4096, - "input_cost_per_token": 1.3e-07, - "output_cost_per_token": 1.3e-07, + "input_cost_per_token": 1.3e-7, + "output_cost_per_token": 1.3e-7, "litellm_provider": "deepinfra", "mode": "completion" }, @@ -14319,8 +15010,8 @@ "max_tokens": 4096, "max_input_tokens": 4096, "max_output_tokens": 4096, - "input_cost_per_token": 7e-07, - "output_cost_per_token": 9e-07, + "input_cost_per_token": 7e-7, + "output_cost_per_token": 9e-7, "litellm_provider": "deepinfra", "mode": "chat", "supports_tool_choice": true @@ -14329,8 +15020,8 @@ "max_tokens": 4096, "max_input_tokens": 4096, "max_output_tokens": 4096, - "input_cost_per_token": 2.2e-07, - "output_cost_per_token": 2.2e-07, + "input_cost_per_token": 2.2e-7, + "output_cost_per_token": 2.2e-7, "litellm_provider": "deepinfra", "mode": "chat", "supports_tool_choice": true @@ -14339,8 +15030,8 @@ "max_tokens": 8191, "max_input_tokens": 32768, "max_output_tokens": 8191, - "input_cost_per_token": 2e-07, - "output_cost_per_token": 2e-07, + "input_cost_per_token": 2e-7, + "output_cost_per_token": 2e-7, "litellm_provider": "deepinfra", "mode": "chat", "supports_tool_choice": true @@ -14349,8 +15040,8 @@ "max_tokens": 4096, "max_input_tokens": 4096, "max_output_tokens": 4096, - "input_cost_per_token": 1.3e-07, - "output_cost_per_token": 1.3e-07, + "input_cost_per_token": 1.3e-7, + "output_cost_per_token": 1.3e-7, "litellm_provider": "deepinfra", "mode": "chat", "supports_tool_choice": true @@ -14359,8 +15050,8 @@ "max_tokens": 8191, "max_input_tokens": 8191, "max_output_tokens": 4096, - "input_cost_per_token": 8e-08, - "output_cost_per_token": 8e-08, + "input_cost_per_token": 8e-8, + "output_cost_per_token": 8e-8, "litellm_provider": "deepinfra", "mode": "chat", "supports_tool_choice": true @@ -14369,8 +15060,8 @@ "max_tokens": 8191, "max_input_tokens": 8191, "max_output_tokens": 4096, - "input_cost_per_token": 5.9e-07, - "output_cost_per_token": 7.9e-07, + "input_cost_per_token": 5.9e-7, + "output_cost_per_token": 7.9e-7, "litellm_provider": "deepinfra", "mode": "chat", "supports_tool_choice": true @@ -14379,8 +15070,8 @@ "max_tokens": 32768, "max_input_tokens": 32768, "max_output_tokens": 32768, - "input_cost_per_token": 9e-07, - "output_cost_per_token": 9e-07, + "input_cost_per_token": 9e-7, + "output_cost_per_token": 9e-7, "litellm_provider": "deepinfra", "mode": "chat", "supports_function_calling": true, @@ -14391,8 +15082,8 @@ "max_tokens": 4096, "max_input_tokens": 200000, "max_output_tokens": 4096, - "input_cost_per_token": 6e-07, - "output_cost_per_token": 6e-07, + "input_cost_per_token": 6e-7, + "output_cost_per_token": 6e-7, "litellm_provider": "deepinfra", "mode": "completion" }, @@ -14400,8 +15091,8 @@ "max_tokens": 4096, "max_input_tokens": 4096, "max_output_tokens": 4096, - "input_cost_per_token": 1.3e-07, - "output_cost_per_token": 1.3e-07, + "input_cost_per_token": 1.3e-7, + "output_cost_per_token": 1.3e-7, "litellm_provider": "deepinfra", "mode": "chat", "supports_tool_choice": true @@ -14410,8 +15101,8 @@ "max_tokens": 16384, "max_input_tokens": 16384, "max_output_tokens": 16384, - "input_cost_per_token": 3.5e-07, - "output_cost_per_token": 1.4e-06, + "input_cost_per_token": 3.5e-7, + "output_cost_per_token": 0.0000014, "litellm_provider": "perplexity", "mode": "chat" }, @@ -14419,8 +15110,8 @@ "max_tokens": 16384, "max_input_tokens": 16384, "max_output_tokens": 16384, - "input_cost_per_token": 7e-07, - "output_cost_per_token": 2.8e-06, + "input_cost_per_token": 7e-7, + "output_cost_per_token": 0.0000028, "litellm_provider": "perplexity", "mode": "chat" }, @@ -14428,8 +15119,8 @@ "max_tokens": 131072, "max_input_tokens": 131072, "max_output_tokens": 131072, - "input_cost_per_token": 1e-06, - "output_cost_per_token": 1e-06, + "input_cost_per_token": 0.000001, + "output_cost_per_token": 0.000001, "litellm_provider": "perplexity", "mode": "chat" }, @@ -14437,8 +15128,8 @@ "max_tokens": 131072, "max_input_tokens": 131072, "max_output_tokens": 131072, - "input_cost_per_token": 2e-07, - "output_cost_per_token": 2e-07, + "input_cost_per_token": 2e-7, + "output_cost_per_token": 2e-7, "litellm_provider": "perplexity", "mode": "chat" }, @@ -14446,8 +15137,8 @@ "max_tokens": 127072, "max_input_tokens": 127072, "max_output_tokens": 127072, - "input_cost_per_token": 5e-06, - "output_cost_per_token": 5e-06, + "input_cost_per_token": 0.000005, + "output_cost_per_token": 0.000005, "litellm_provider": "perplexity", "mode": "chat", "deprecation_date": "2025-02-22" @@ -14456,8 +15147,8 @@ "max_tokens": 127072, "max_input_tokens": 127072, "max_output_tokens": 127072, - "input_cost_per_token": 1e-06, - "output_cost_per_token": 1e-06, + "input_cost_per_token": 0.000001, + "output_cost_per_token": 0.000001, "litellm_provider": "perplexity", "mode": "chat", "deprecation_date": "2025-02-22" @@ -14466,8 +15157,8 @@ "max_tokens": 131072, "max_input_tokens": 131072, "max_output_tokens": 131072, - "input_cost_per_token": 1e-06, - "output_cost_per_token": 1e-06, + "input_cost_per_token": 0.000001, + "output_cost_per_token": 0.000001, "litellm_provider": "perplexity", "mode": "chat", "deprecation_date": "2025-02-22" @@ -14476,8 +15167,8 @@ "max_tokens": 131072, "max_input_tokens": 131072, "max_output_tokens": 131072, - "input_cost_per_token": 2e-07, - "output_cost_per_token": 2e-07, + "input_cost_per_token": 2e-7, + "output_cost_per_token": 2e-7, "litellm_provider": "perplexity", "mode": "chat", "deprecation_date": "2025-02-22" @@ -14486,8 +15177,8 @@ "max_tokens": 127072, "max_input_tokens": 127072, "max_output_tokens": 127072, - "input_cost_per_token": 2e-07, - "output_cost_per_token": 2e-07, + "input_cost_per_token": 2e-7, + "output_cost_per_token": 2e-7, "litellm_provider": "perplexity", "mode": "chat", "deprecation_date": "2025-02-22" @@ -14496,8 +15187,8 @@ "max_tokens": 8192, "max_input_tokens": 8192, "max_output_tokens": 8192, - "input_cost_per_token": 7e-08, - "output_cost_per_token": 2.8e-07, + "input_cost_per_token": 7e-8, + "output_cost_per_token": 2.8e-7, "litellm_provider": "perplexity", "mode": "chat" }, @@ -14505,8 +15196,8 @@ "max_tokens": 4096, "max_input_tokens": 4096, "max_output_tokens": 4096, - "input_cost_per_token": 7e-07, - "output_cost_per_token": 2.8e-06, + "input_cost_per_token": 7e-7, + "output_cost_per_token": 0.0000028, "litellm_provider": "perplexity", "mode": "chat" }, @@ -14514,8 +15205,8 @@ "max_tokens": 4096, "max_input_tokens": 4096, "max_output_tokens": 4096, - "input_cost_per_token": 0.0, - "output_cost_per_token": 2.8e-07, + "input_cost_per_token": 0, + "output_cost_per_token": 2.8e-7, "input_cost_per_request": 0.005, "litellm_provider": "perplexity", "mode": "chat" @@ -14524,8 +15215,8 @@ "max_tokens": 4096, "max_input_tokens": 4096, "max_output_tokens": 4096, - "input_cost_per_token": 0.0, - "output_cost_per_token": 2.8e-06, + "input_cost_per_token": 0, + "output_cost_per_token": 0.0000028, "input_cost_per_request": 0.005, "litellm_provider": "perplexity", "mode": "chat" @@ -14534,8 +15225,8 @@ "max_tokens": 4096, "max_input_tokens": 4096, "max_output_tokens": 4096, - "input_cost_per_token": 7e-07, - "output_cost_per_token": 2.8e-06, + "input_cost_per_token": 7e-7, + "output_cost_per_token": 0.0000028, "litellm_provider": "perplexity", "mode": "chat" }, @@ -14543,8 +15234,8 @@ "max_tokens": 4096, "max_input_tokens": 4096, "max_output_tokens": 4096, - "input_cost_per_token": 7e-08, - "output_cost_per_token": 2.8e-07, + "input_cost_per_token": 7e-8, + "output_cost_per_token": 2.8e-7, "litellm_provider": "perplexity", "mode": "chat" }, @@ -14552,8 +15243,8 @@ "max_tokens": 4096, "max_input_tokens": 4096, "max_output_tokens": 4096, - "input_cost_per_token": 7e-08, - "output_cost_per_token": 2.8e-07, + "input_cost_per_token": 7e-8, + "output_cost_per_token": 2.8e-7, "litellm_provider": "perplexity", "mode": "chat" }, @@ -14561,8 +15252,8 @@ "max_tokens": 16384, "max_input_tokens": 16384, "max_output_tokens": 16384, - "input_cost_per_token": 7e-08, - "output_cost_per_token": 2.8e-07, + "input_cost_per_token": 7e-8, + "output_cost_per_token": 2.8e-7, "litellm_provider": "perplexity", "mode": "chat" }, @@ -14571,7 +15262,7 @@ "max_input_tokens": 12000, "max_output_tokens": 12000, "input_cost_per_token": 0, - "output_cost_per_token": 2.8e-07, + "output_cost_per_token": 2.8e-7, "input_cost_per_request": 0.005, "litellm_provider": "perplexity", "mode": "chat" @@ -14580,8 +15271,8 @@ "max_tokens": 16384, "max_input_tokens": 16384, "max_output_tokens": 16384, - "input_cost_per_token": 6e-07, - "output_cost_per_token": 1.8e-06, + "input_cost_per_token": 6e-7, + "output_cost_per_token": 0.0000018, "litellm_provider": "perplexity", "mode": "chat" }, @@ -14590,7 +15281,7 @@ "max_input_tokens": 12000, "max_output_tokens": 12000, "input_cost_per_token": 0, - "output_cost_per_token": 1.8e-06, + "output_cost_per_token": 0.0000018, "input_cost_per_request": 0.005, "litellm_provider": "perplexity", "mode": "chat" @@ -14598,8 +15289,8 @@ "perplexity/sonar": { "max_tokens": 128000, "max_input_tokens": 128000, - "input_cost_per_token": 1e-06, - "output_cost_per_token": 1e-06, + "input_cost_per_token": 0.000001, + "output_cost_per_token": 0.000001, "litellm_provider": "perplexity", "mode": "chat", "search_context_cost_per_query": { @@ -14613,8 +15304,8 @@ "max_tokens": 8000, "max_input_tokens": 200000, "max_output_tokens": 8000, - "input_cost_per_token": 3e-06, - "output_cost_per_token": 1.5e-05, + "input_cost_per_token": 0.000003, + "output_cost_per_token": 0.000015, "litellm_provider": "perplexity", "mode": "chat", "search_context_cost_per_query": { @@ -14627,8 +15318,8 @@ "perplexity/sonar-reasoning": { "max_tokens": 128000, "max_input_tokens": 128000, - "input_cost_per_token": 1e-06, - "output_cost_per_token": 5e-06, + "input_cost_per_token": 0.000001, + "output_cost_per_token": 0.000005, "litellm_provider": "perplexity", "mode": "chat", "search_context_cost_per_query": { @@ -14642,8 +15333,8 @@ "perplexity/sonar-reasoning-pro": { "max_tokens": 128000, "max_input_tokens": 128000, - "input_cost_per_token": 2e-06, - "output_cost_per_token": 8e-06, + "input_cost_per_token": 0.000002, + "output_cost_per_token": 0.000008, "litellm_provider": "perplexity", "mode": "chat", "search_context_cost_per_query": { @@ -14657,10 +15348,10 @@ "perplexity/sonar-deep-research": { "max_tokens": 128000, "max_input_tokens": 128000, - "input_cost_per_token": 2e-06, - "output_cost_per_token": 8e-06, - "output_cost_per_reasoning_token": 3e-06, - "citation_cost_per_token": 2e-06, + "input_cost_per_token": 0.000002, + "output_cost_per_token": 0.000008, + "output_cost_per_reasoning_token": 0.000003, + "citation_cost_per_token": 0.000002, "search_context_cost_per_query": { "search_context_size_low": 0.005, "search_context_size_medium": 0.005, @@ -14675,8 +15366,8 @@ "max_tokens": 16384, "max_input_tokens": 16384, "max_output_tokens": 16384, - "input_cost_per_token": 1e-07, - "output_cost_per_token": 1e-07, + "input_cost_per_token": 1e-7, + "output_cost_per_token": 1e-7, "litellm_provider": "fireworks_ai", "mode": "chat", "supports_function_calling": false, @@ -14688,8 +15379,8 @@ "max_tokens": 16384, "max_input_tokens": 16384, "max_output_tokens": 16384, - "input_cost_per_token": 1e-07, - "output_cost_per_token": 1e-07, + "input_cost_per_token": 1e-7, + "output_cost_per_token": 1e-7, "litellm_provider": "fireworks_ai", "mode": "chat", "supports_function_calling": false, @@ -14701,8 +15392,8 @@ "max_tokens": 16384, "max_input_tokens": 16384, "max_output_tokens": 16384, - "input_cost_per_token": 1e-07, - "output_cost_per_token": 1e-07, + "input_cost_per_token": 1e-7, + "output_cost_per_token": 1e-7, "litellm_provider": "fireworks_ai", "mode": "chat", "supports_function_calling": false, @@ -14714,8 +15405,8 @@ "max_tokens": 16384, "max_input_tokens": 16384, "max_output_tokens": 16384, - "input_cost_per_token": 2e-07, - "output_cost_per_token": 2e-07, + "input_cost_per_token": 2e-7, + "output_cost_per_token": 2e-7, "litellm_provider": "fireworks_ai", "mode": "chat", "supports_function_calling": false, @@ -14728,8 +15419,8 @@ "max_tokens": 16384, "max_input_tokens": 16384, "max_output_tokens": 16384, - "input_cost_per_token": 9e-07, - "output_cost_per_token": 9e-07, + "input_cost_per_token": 9e-7, + "output_cost_per_token": 9e-7, "litellm_provider": "fireworks_ai", "mode": "chat", "supports_tool_choice": false, @@ -14741,8 +15432,8 @@ "max_tokens": 8192, "max_input_tokens": 8192, "max_output_tokens": 8192, - "input_cost_per_token": 9e-07, - "output_cost_per_token": 9e-07, + "input_cost_per_token": 9e-7, + "output_cost_per_token": 9e-7, "litellm_provider": "fireworks_ai", "mode": "chat", "supports_function_calling": true, @@ -14754,8 +15445,8 @@ "max_tokens": 65536, "max_input_tokens": 65536, "max_output_tokens": 65536, - "input_cost_per_token": 1.2e-06, - "output_cost_per_token": 1.2e-06, + "input_cost_per_token": 0.0000012, + "output_cost_per_token": 0.0000012, "litellm_provider": "fireworks_ai", "mode": "chat", "supports_function_calling": true, @@ -14767,8 +15458,8 @@ "max_tokens": 32768, "max_input_tokens": 32768, "max_output_tokens": 32768, - "input_cost_per_token": 9e-07, - "output_cost_per_token": 9e-07, + "input_cost_per_token": 9e-7, + "output_cost_per_token": 9e-7, "litellm_provider": "fireworks_ai", "mode": "chat", "supports_function_calling": false, @@ -14780,8 +15471,8 @@ "max_tokens": 4096, "max_input_tokens": 4096, "max_output_tokens": 4096, - "input_cost_per_token": 9e-07, - "output_cost_per_token": 9e-07, + "input_cost_per_token": 9e-7, + "output_cost_per_token": 9e-7, "litellm_provider": "fireworks_ai", "mode": "chat", "supports_function_calling": false, @@ -14793,8 +15484,8 @@ "max_tokens": 32768, "max_input_tokens": 32768, "max_output_tokens": 32768, - "input_cost_per_token": 3e-06, - "output_cost_per_token": 3e-06, + "input_cost_per_token": 0.000003, + "output_cost_per_token": 0.000003, "litellm_provider": "fireworks_ai", "mode": "chat", "supports_function_calling": false, @@ -14806,8 +15497,8 @@ "max_tokens": 65536, "max_input_tokens": 65536, "max_output_tokens": 65536, - "input_cost_per_token": 1.2e-06, - "output_cost_per_token": 1.2e-06, + "input_cost_per_token": 0.0000012, + "output_cost_per_token": 0.0000012, "litellm_provider": "fireworks_ai", "mode": "chat", "supports_function_calling": false, @@ -14819,8 +15510,8 @@ "max_tokens": 8192, "max_input_tokens": 128000, "max_output_tokens": 8192, - "input_cost_per_token": 9e-07, - "output_cost_per_token": 9e-07, + "input_cost_per_token": 9e-7, + "output_cost_per_token": 9e-7, "litellm_provider": "fireworks_ai", "mode": "chat", "supports_response_schema": true, @@ -14831,8 +15522,8 @@ "max_tokens": 20480, "max_input_tokens": 128000, "max_output_tokens": 20480, - "input_cost_per_token": 3e-06, - "output_cost_per_token": 8e-06, + "input_cost_per_token": 0.000003, + "output_cost_per_token": 0.000008, "litellm_provider": "fireworks_ai", "mode": "chat", "supports_response_schema": true, @@ -14843,8 +15534,8 @@ "max_tokens": 20480, "max_input_tokens": 128000, "max_output_tokens": 20480, - "input_cost_per_token": 5.5e-07, - "output_cost_per_token": 2.19e-06, + "input_cost_per_token": 5.5e-7, + "output_cost_per_token": 0.00000219, "litellm_provider": "fireworks_ai", "mode": "chat", "supports_response_schema": true, @@ -14855,8 +15546,8 @@ "max_tokens": 160000, "max_input_tokens": 160000, "max_output_tokens": 160000, - "input_cost_per_token": 3e-06, - "output_cost_per_token": 8e-06, + "input_cost_per_token": 0.000003, + "output_cost_per_token": 0.000008, "litellm_provider": "fireworks_ai", "mode": "chat", "source": "https://fireworks.ai/pricing", @@ -14867,8 +15558,8 @@ "max_tokens": 131072, "max_input_tokens": 131072, "max_output_tokens": 16384, - "input_cost_per_token": 6e-07, - "output_cost_per_token": 2.5e-06, + "input_cost_per_token": 6e-7, + "output_cost_per_token": 0.0000025, "litellm_provider": "fireworks_ai", "mode": "chat", "supports_function_calling": true, @@ -14880,8 +15571,8 @@ "max_tokens": 16384, "max_input_tokens": 128000, "max_output_tokens": 16384, - "input_cost_per_token": 3e-06, - "output_cost_per_token": 3e-06, + "input_cost_per_token": 0.000003, + "output_cost_per_token": 0.000003, "litellm_provider": "fireworks_ai", "mode": "chat", "supports_response_schema": true, @@ -14893,8 +15584,8 @@ "max_tokens": 131072, "max_input_tokens": 131072, "max_output_tokens": 131072, - "input_cost_per_token": 2.2e-07, - "output_cost_per_token": 8.8e-07, + "input_cost_per_token": 2.2e-7, + "output_cost_per_token": 8.8e-7, "litellm_provider": "fireworks_ai", "mode": "chat", "supports_response_schema": true, @@ -14905,8 +15596,8 @@ "max_tokens": 131072, "max_input_tokens": 131072, "max_output_tokens": 131072, - "input_cost_per_token": 1.5e-07, - "output_cost_per_token": 6e-07, + "input_cost_per_token": 1.5e-7, + "output_cost_per_token": 6e-7, "litellm_provider": "fireworks_ai", "mode": "chat", "supports_response_schema": true, @@ -14914,11 +15605,11 @@ "supports_tool_choice": false }, "fireworks_ai/accounts/fireworks/models/glm-4p5": { - "max_tokens": 128000, + "max_tokens": 96000, "max_input_tokens": 128000, "max_output_tokens": 96000, - "input_cost_per_token": 5.5e-07, - "output_cost_per_token": 2.19e-06, + "input_cost_per_token": 5.5e-7, + "output_cost_per_token": 0.00000219, "litellm_provider": "fireworks_ai", "mode": "chat", "supports_function_calling": true, @@ -14927,11 +15618,11 @@ "source": "https://fireworks.ai/models/fireworks/glm-4p5" }, "fireworks_ai/accounts/fireworks/models/glm-4p5-air": { - "max_tokens": 128000, + "max_tokens": 96000, "max_input_tokens": 128000, "max_output_tokens": 96000, - "input_cost_per_token": 2.2e-07, - "output_cost_per_token": 8.8e-07, + "input_cost_per_token": 2.2e-7, + "output_cost_per_token": 8.8e-7, "litellm_provider": "fireworks_ai", "mode": "chat", "supports_function_calling": true, @@ -14943,8 +15634,8 @@ "max_tokens": 131072, "max_input_tokens": 131072, "max_output_tokens": 131072, - "input_cost_per_token": 1.5e-07, - "output_cost_per_token": 6e-07, + "input_cost_per_token": 1.5e-7, + "output_cost_per_token": 6e-7, "litellm_provider": "fireworks_ai", "mode": "chat", "supports_function_calling": true, @@ -14956,8 +15647,8 @@ "max_tokens": 131072, "max_input_tokens": 131072, "max_output_tokens": 131072, - "input_cost_per_token": 5e-08, - "output_cost_per_token": 2e-07, + "input_cost_per_token": 5e-8, + "output_cost_per_token": 2e-7, "litellm_provider": "fireworks_ai", "mode": "chat", "supports_function_calling": true, @@ -14968,8 +15659,8 @@ "fireworks_ai/nomic-ai/nomic-embed-text-v1.5": { "max_tokens": 8192, "max_input_tokens": 8192, - "input_cost_per_token": 8e-09, - "output_cost_per_token": 0.0, + "input_cost_per_token": 8e-9, + "output_cost_per_token": 0, "litellm_provider": "fireworks_ai-embedding-models", "mode": "embedding", "source": "https://fireworks.ai/pricing" @@ -14977,8 +15668,8 @@ "fireworks_ai/nomic-ai/nomic-embed-text-v1": { "max_tokens": 8192, "max_input_tokens": 8192, - "input_cost_per_token": 8e-09, - "output_cost_per_token": 0.0, + "input_cost_per_token": 8e-9, + "output_cost_per_token": 0, "litellm_provider": "fireworks_ai-embedding-models", "mode": "embedding", "source": "https://fireworks.ai/pricing" @@ -14986,8 +15677,8 @@ "fireworks_ai/WhereIsAI/UAE-Large-V1": { "max_tokens": 512, "max_input_tokens": 512, - "input_cost_per_token": 1.6e-08, - "output_cost_per_token": 0.0, + "input_cost_per_token": 1.6e-8, + "output_cost_per_token": 0, "litellm_provider": "fireworks_ai-embedding-models", "mode": "embedding", "source": "https://fireworks.ai/pricing" @@ -14995,8 +15686,8 @@ "fireworks_ai/thenlper/gte-large": { "max_tokens": 512, "max_input_tokens": 512, - "input_cost_per_token": 1.6e-08, - "output_cost_per_token": 0.0, + "input_cost_per_token": 1.6e-8, + "output_cost_per_token": 0, "litellm_provider": "fireworks_ai-embedding-models", "mode": "embedding", "source": "https://fireworks.ai/pricing" @@ -15004,58 +15695,58 @@ "fireworks_ai/thenlper/gte-base": { "max_tokens": 512, "max_input_tokens": 512, - "input_cost_per_token": 8e-09, - "output_cost_per_token": 0.0, + "input_cost_per_token": 8e-9, + "output_cost_per_token": 0, "litellm_provider": "fireworks_ai-embedding-models", "mode": "embedding", "source": "https://fireworks.ai/pricing" }, "fireworks-ai-up-to-4b": { - "input_cost_per_token": 2e-07, - "output_cost_per_token": 2e-07, + "input_cost_per_token": 2e-7, + "output_cost_per_token": 2e-7, "litellm_provider": "fireworks_ai" }, "fireworks-ai-4.1b-to-16b": { - "input_cost_per_token": 2e-07, - "output_cost_per_token": 2e-07, + "input_cost_per_token": 2e-7, + "output_cost_per_token": 2e-7, "litellm_provider": "fireworks_ai" }, "fireworks-ai-above-16b": { - "input_cost_per_token": 9e-07, - "output_cost_per_token": 9e-07, + "input_cost_per_token": 9e-7, + "output_cost_per_token": 9e-7, "litellm_provider": "fireworks_ai" }, "fireworks-ai-moe-up-to-56b": { - "input_cost_per_token": 5e-07, - "output_cost_per_token": 5e-07, + "input_cost_per_token": 5e-7, + "output_cost_per_token": 5e-7, "litellm_provider": "fireworks_ai" }, "fireworks-ai-56b-to-176b": { - "input_cost_per_token": 1.2e-06, - "output_cost_per_token": 1.2e-06, + "input_cost_per_token": 0.0000012, + "output_cost_per_token": 0.0000012, "litellm_provider": "fireworks_ai" }, "fireworks-ai-default": { - "input_cost_per_token": 0.0, - "output_cost_per_token": 0.0, + "input_cost_per_token": 0, + "output_cost_per_token": 0, "litellm_provider": "fireworks_ai" }, "fireworks-ai-embedding-up-to-150m": { - "input_cost_per_token": 8e-09, - "output_cost_per_token": 0.0, + "input_cost_per_token": 8e-9, + "output_cost_per_token": 0, "litellm_provider": "fireworks_ai-embedding-models" }, "fireworks-ai-embedding-150m-to-350m": { - "input_cost_per_token": 1.6e-08, - "output_cost_per_token": 0.0, + "input_cost_per_token": 1.6e-8, + "output_cost_per_token": 0, "litellm_provider": "fireworks_ai-embedding-models" }, "anyscale/mistralai/Mistral-7B-Instruct-v0.1": { "max_tokens": 16384, "max_input_tokens": 16384, "max_output_tokens": 16384, - "input_cost_per_token": 1.5e-07, - "output_cost_per_token": 1.5e-07, + "input_cost_per_token": 1.5e-7, + "output_cost_per_token": 1.5e-7, "litellm_provider": "anyscale", "mode": "chat", "supports_function_calling": true, @@ -15065,8 +15756,8 @@ "max_tokens": 16384, "max_input_tokens": 16384, "max_output_tokens": 16384, - "input_cost_per_token": 1.5e-07, - "output_cost_per_token": 1.5e-07, + "input_cost_per_token": 1.5e-7, + "output_cost_per_token": 1.5e-7, "litellm_provider": "anyscale", "mode": "chat", "supports_function_calling": true, @@ -15076,8 +15767,8 @@ "max_tokens": 65536, "max_input_tokens": 65536, "max_output_tokens": 65536, - "input_cost_per_token": 9e-07, - "output_cost_per_token": 9e-07, + "input_cost_per_token": 9e-7, + "output_cost_per_token": 9e-7, "litellm_provider": "anyscale", "mode": "chat", "supports_function_calling": true, @@ -15087,8 +15778,8 @@ "max_tokens": 16384, "max_input_tokens": 16384, "max_output_tokens": 16384, - "input_cost_per_token": 1.5e-07, - "output_cost_per_token": 1.5e-07, + "input_cost_per_token": 1.5e-7, + "output_cost_per_token": 1.5e-7, "litellm_provider": "anyscale", "mode": "chat" }, @@ -15096,8 +15787,8 @@ "max_tokens": 8192, "max_input_tokens": 8192, "max_output_tokens": 8192, - "input_cost_per_token": 1.5e-07, - "output_cost_per_token": 1.5e-07, + "input_cost_per_token": 1.5e-7, + "output_cost_per_token": 1.5e-7, "litellm_provider": "anyscale", "mode": "chat", "source": "https://docs.anyscale.com/preview/endpoints/text-generation/supported-models/google-gemma-7b-it" @@ -15106,8 +15797,8 @@ "max_tokens": 4096, "max_input_tokens": 4096, "max_output_tokens": 4096, - "input_cost_per_token": 1.5e-07, - "output_cost_per_token": 1.5e-07, + "input_cost_per_token": 1.5e-7, + "output_cost_per_token": 1.5e-7, "litellm_provider": "anyscale", "mode": "chat" }, @@ -15115,8 +15806,8 @@ "max_tokens": 4096, "max_input_tokens": 4096, "max_output_tokens": 4096, - "input_cost_per_token": 2.5e-07, - "output_cost_per_token": 2.5e-07, + "input_cost_per_token": 2.5e-7, + "output_cost_per_token": 2.5e-7, "litellm_provider": "anyscale", "mode": "chat" }, @@ -15124,8 +15815,8 @@ "max_tokens": 4096, "max_input_tokens": 4096, "max_output_tokens": 4096, - "input_cost_per_token": 1e-06, - "output_cost_per_token": 1e-06, + "input_cost_per_token": 0.000001, + "output_cost_per_token": 0.000001, "litellm_provider": "anyscale", "mode": "chat" }, @@ -15133,8 +15824,8 @@ "max_tokens": 4096, "max_input_tokens": 4096, "max_output_tokens": 4096, - "input_cost_per_token": 1e-06, - "output_cost_per_token": 1e-06, + "input_cost_per_token": 0.000001, + "output_cost_per_token": 0.000001, "litellm_provider": "anyscale", "mode": "chat" }, @@ -15142,8 +15833,8 @@ "max_tokens": 4096, "max_input_tokens": 4096, "max_output_tokens": 4096, - "input_cost_per_token": 1e-06, - "output_cost_per_token": 1e-06, + "input_cost_per_token": 0.000001, + "output_cost_per_token": 0.000001, "litellm_provider": "anyscale", "mode": "chat", "source": "https://docs.anyscale.com/preview/endpoints/text-generation/supported-models/codellama-CodeLlama-70b-Instruct-hf" @@ -15152,8 +15843,8 @@ "max_tokens": 8192, "max_input_tokens": 8192, "max_output_tokens": 8192, - "input_cost_per_token": 1.5e-07, - "output_cost_per_token": 1.5e-07, + "input_cost_per_token": 1.5e-7, + "output_cost_per_token": 1.5e-7, "litellm_provider": "anyscale", "mode": "chat", "source": "https://docs.anyscale.com/preview/endpoints/text-generation/supported-models/meta-llama-Meta-Llama-3-8B-Instruct" @@ -15162,8 +15853,8 @@ "max_tokens": 8192, "max_input_tokens": 8192, "max_output_tokens": 8192, - "input_cost_per_token": 1e-06, - "output_cost_per_token": 1e-06, + "input_cost_per_token": 0.000001, + "output_cost_per_token": 0.000001, "litellm_provider": "anyscale", "mode": "chat", "source": "https://docs.anyscale.com/preview/endpoints/text-generation/supported-models/meta-llama-Meta-Llama-3-70B-Instruct" @@ -15172,8 +15863,8 @@ "max_tokens": 3072, "max_input_tokens": 3072, "max_output_tokens": 3072, - "input_cost_per_token": 1.923e-06, - "output_cost_per_token": 1.923e-06, + "input_cost_per_token": 0.000001923, + "output_cost_per_token": 0.000001923, "litellm_provider": "cloudflare", "mode": "chat" }, @@ -15181,8 +15872,8 @@ "max_tokens": 2048, "max_input_tokens": 2048, "max_output_tokens": 2048, - "input_cost_per_token": 1.923e-06, - "output_cost_per_token": 1.923e-06, + "input_cost_per_token": 0.000001923, + "output_cost_per_token": 0.000001923, "litellm_provider": "cloudflare", "mode": "chat" }, @@ -15190,8 +15881,8 @@ "max_tokens": 8192, "max_input_tokens": 8192, "max_output_tokens": 8192, - "input_cost_per_token": 1.923e-06, - "output_cost_per_token": 1.923e-06, + "input_cost_per_token": 0.000001923, + "output_cost_per_token": 0.000001923, "litellm_provider": "cloudflare", "mode": "chat" }, @@ -15199,8 +15890,8 @@ "max_tokens": 4096, "max_input_tokens": 4096, "max_output_tokens": 4096, - "input_cost_per_token": 1.923e-06, - "output_cost_per_token": 1.923e-06, + "input_cost_per_token": 0.000001923, + "output_cost_per_token": 0.000001923, "litellm_provider": "cloudflare", "mode": "chat" }, @@ -15208,8 +15899,8 @@ "max_tokens": 128000, "max_input_tokens": 128000, "max_output_tokens": 128000, - "input_cost_per_token": 3e-06, - "output_cost_per_token": 1.5e-05, + "input_cost_per_token": 0.000003, + "output_cost_per_token": 0.000015, "litellm_provider": "v0", "mode": "chat", "supports_function_calling": true, @@ -15222,8 +15913,8 @@ "max_tokens": 128000, "max_input_tokens": 128000, "max_output_tokens": 128000, - "input_cost_per_token": 3e-06, - "output_cost_per_token": 1.5e-05, + "input_cost_per_token": 0.000003, + "output_cost_per_token": 0.000015, "litellm_provider": "v0", "mode": "chat", "supports_function_calling": true, @@ -15236,8 +15927,8 @@ "max_tokens": 512000, "max_input_tokens": 512000, "max_output_tokens": 512000, - "input_cost_per_token": 1.5e-05, - "output_cost_per_token": 7.5e-05, + "input_cost_per_token": 0.000015, + "output_cost_per_token": 0.000075, "litellm_provider": "v0", "mode": "chat", "supports_function_calling": true, @@ -15250,8 +15941,8 @@ "max_tokens": 131072, "max_input_tokens": 131072, "max_output_tokens": 131072, - "input_cost_per_token": 2e-07, - "output_cost_per_token": 6e-07, + "input_cost_per_token": 2e-7, + "output_cost_per_token": 6e-7, "litellm_provider": "lambda_ai", "mode": "chat", "supports_function_calling": true, @@ -15264,8 +15955,8 @@ "max_tokens": 131072, "max_input_tokens": 131072, "max_output_tokens": 131072, - "input_cost_per_token": 2e-07, - "output_cost_per_token": 6e-07, + "input_cost_per_token": 2e-7, + "output_cost_per_token": 6e-7, "litellm_provider": "lambda_ai", "mode": "chat", "supports_function_calling": true, @@ -15278,8 +15969,8 @@ "max_tokens": 131072, "max_input_tokens": 131072, "max_output_tokens": 131072, - "input_cost_per_token": 8e-07, - "output_cost_per_token": 8e-07, + "input_cost_per_token": 8e-7, + "output_cost_per_token": 8e-7, "litellm_provider": "lambda_ai", "mode": "chat", "supports_function_calling": true, @@ -15292,8 +15983,8 @@ "max_tokens": 131072, "max_input_tokens": 131072, "max_output_tokens": 131072, - "input_cost_per_token": 2e-07, - "output_cost_per_token": 6e-07, + "input_cost_per_token": 2e-7, + "output_cost_per_token": 6e-7, "litellm_provider": "lambda_ai", "mode": "chat", "supports_function_calling": true, @@ -15305,8 +15996,8 @@ "max_tokens": 131072, "max_input_tokens": 131072, "max_output_tokens": 131072, - "input_cost_per_token": 8e-07, - "output_cost_per_token": 8e-07, + "input_cost_per_token": 8e-7, + "output_cost_per_token": 8e-7, "litellm_provider": "lambda_ai", "mode": "chat", "supports_function_calling": true, @@ -15318,8 +16009,8 @@ "max_tokens": 131072, "max_input_tokens": 131072, "max_output_tokens": 131072, - "input_cost_per_token": 1.2e-07, - "output_cost_per_token": 3e-07, + "input_cost_per_token": 1.2e-7, + "output_cost_per_token": 3e-7, "litellm_provider": "lambda_ai", "mode": "chat", "supports_function_calling": true, @@ -15331,8 +16022,8 @@ "max_tokens": 131072, "max_input_tokens": 131072, "max_output_tokens": 131072, - "input_cost_per_token": 2.5e-08, - "output_cost_per_token": 4e-08, + "input_cost_per_token": 2.5e-8, + "output_cost_per_token": 4e-8, "litellm_provider": "lambda_ai", "mode": "chat", "supports_function_calling": true, @@ -15344,8 +16035,8 @@ "max_tokens": 131072, "max_input_tokens": 131072, "max_output_tokens": 131072, - "input_cost_per_token": 1e-07, - "output_cost_per_token": 2e-07, + "input_cost_per_token": 1e-7, + "output_cost_per_token": 2e-7, "litellm_provider": "lambda_ai", "mode": "chat", "supports_function_calling": true, @@ -15357,8 +16048,8 @@ "max_tokens": 131072, "max_input_tokens": 131072, "max_output_tokens": 131072, - "input_cost_per_token": 2.5e-08, - "output_cost_per_token": 4e-08, + "input_cost_per_token": 2.5e-8, + "output_cost_per_token": 4e-8, "litellm_provider": "lambda_ai", "mode": "chat", "supports_function_calling": true, @@ -15370,8 +16061,8 @@ "max_tokens": 131072, "max_input_tokens": 131072, "max_output_tokens": 8192, - "input_cost_per_token": 5e-08, - "output_cost_per_token": 1e-07, + "input_cost_per_token": 5e-8, + "output_cost_per_token": 1e-7, "litellm_provider": "lambda_ai", "mode": "chat", "supports_function_calling": true, @@ -15383,8 +16074,8 @@ "max_tokens": 16384, "max_input_tokens": 16384, "max_output_tokens": 8192, - "input_cost_per_token": 5e-08, - "output_cost_per_token": 1e-07, + "input_cost_per_token": 5e-8, + "output_cost_per_token": 1e-7, "litellm_provider": "lambda_ai", "mode": "chat", "supports_function_calling": true, @@ -15396,8 +16087,8 @@ "max_tokens": 131072, "max_input_tokens": 131072, "max_output_tokens": 131072, - "input_cost_per_token": 8e-07, - "output_cost_per_token": 8e-07, + "input_cost_per_token": 8e-7, + "output_cost_per_token": 8e-7, "litellm_provider": "lambda_ai", "mode": "chat", "supports_function_calling": true, @@ -15409,8 +16100,8 @@ "max_tokens": 131072, "max_input_tokens": 131072, "max_output_tokens": 131072, - "input_cost_per_token": 1.2e-07, - "output_cost_per_token": 3e-07, + "input_cost_per_token": 1.2e-7, + "output_cost_per_token": 3e-7, "litellm_provider": "lambda_ai", "mode": "chat", "supports_function_calling": true, @@ -15422,8 +16113,8 @@ "max_tokens": 131072, "max_input_tokens": 131072, "max_output_tokens": 131072, - "input_cost_per_token": 2.5e-08, - "output_cost_per_token": 4e-08, + "input_cost_per_token": 2.5e-8, + "output_cost_per_token": 4e-8, "litellm_provider": "lambda_ai", "mode": "chat", "supports_function_calling": true, @@ -15435,8 +16126,8 @@ "max_tokens": 131072, "max_input_tokens": 131072, "max_output_tokens": 131072, - "input_cost_per_token": 1.2e-07, - "output_cost_per_token": 3e-07, + "input_cost_per_token": 1.2e-7, + "output_cost_per_token": 3e-7, "litellm_provider": "lambda_ai", "mode": "chat", "supports_function_calling": true, @@ -15448,8 +16139,8 @@ "max_tokens": 131072, "max_input_tokens": 131072, "max_output_tokens": 131072, - "input_cost_per_token": 1.5e-08, - "output_cost_per_token": 2.5e-08, + "input_cost_per_token": 1.5e-8, + "output_cost_per_token": 2.5e-8, "litellm_provider": "lambda_ai", "mode": "chat", "supports_function_calling": true, @@ -15462,8 +16153,8 @@ "max_tokens": 131072, "max_input_tokens": 131072, "max_output_tokens": 131072, - "input_cost_per_token": 1.5e-08, - "output_cost_per_token": 2.5e-08, + "input_cost_per_token": 1.5e-8, + "output_cost_per_token": 2.5e-8, "litellm_provider": "lambda_ai", "mode": "chat", "supports_function_calling": true, @@ -15475,8 +16166,8 @@ "max_tokens": 131072, "max_input_tokens": 131072, "max_output_tokens": 131072, - "input_cost_per_token": 1.2e-07, - "output_cost_per_token": 3e-07, + "input_cost_per_token": 1.2e-7, + "output_cost_per_token": 3e-7, "litellm_provider": "lambda_ai", "mode": "chat", "supports_function_calling": true, @@ -15488,8 +16179,8 @@ "max_tokens": 131072, "max_input_tokens": 131072, "max_output_tokens": 131072, - "input_cost_per_token": 5e-08, - "output_cost_per_token": 1e-07, + "input_cost_per_token": 5e-8, + "output_cost_per_token": 1e-7, "litellm_provider": "lambda_ai", "mode": "chat", "supports_function_calling": true, @@ -15501,8 +16192,8 @@ "max_tokens": 131072, "max_input_tokens": 131072, "max_output_tokens": 131072, - "input_cost_per_token": 5e-08, - "output_cost_per_token": 1e-07, + "input_cost_per_token": 5e-8, + "output_cost_per_token": 1e-7, "litellm_provider": "lambda_ai", "mode": "chat", "supports_function_calling": true, @@ -15515,8 +16206,8 @@ "max_tokens": 131072, "max_input_tokens": 131072, "max_output_tokens": 131072, - "input_cost_per_token": 2e-06, - "output_cost_per_token": 2e-06, + "input_cost_per_token": 0.000002, + "output_cost_per_token": 0.000002, "litellm_provider": "hyperbolic", "mode": "chat", "supports_function_calling": true, @@ -15528,8 +16219,8 @@ "max_tokens": 131072, "max_input_tokens": 131072, "max_output_tokens": 131072, - "input_cost_per_token": 2.5e-07, - "output_cost_per_token": 2.5e-07, + "input_cost_per_token": 2.5e-7, + "output_cost_per_token": 2.5e-7, "litellm_provider": "hyperbolic", "mode": "chat", "supports_function_calling": true, @@ -15541,8 +16232,8 @@ "max_tokens": 131072, "max_input_tokens": 131072, "max_output_tokens": 131072, - "input_cost_per_token": 2e-06, - "output_cost_per_token": 2e-06, + "input_cost_per_token": 0.000002, + "output_cost_per_token": 0.000002, "litellm_provider": "hyperbolic", "mode": "chat", "supports_function_calling": true, @@ -15554,8 +16245,8 @@ "max_tokens": 32768, "max_input_tokens": 32768, "max_output_tokens": 32768, - "input_cost_per_token": 4e-07, - "output_cost_per_token": 4e-07, + "input_cost_per_token": 4e-7, + "output_cost_per_token": 4e-7, "litellm_provider": "hyperbolic", "mode": "chat", "supports_function_calling": true, @@ -15567,8 +16258,8 @@ "max_tokens": 131072, "max_input_tokens": 131072, "max_output_tokens": 131072, - "input_cost_per_token": 2e-07, - "output_cost_per_token": 2e-07, + "input_cost_per_token": 2e-7, + "output_cost_per_token": 2e-7, "litellm_provider": "hyperbolic", "mode": "chat", "supports_function_calling": true, @@ -15580,8 +16271,8 @@ "max_tokens": 32768, "max_input_tokens": 32768, "max_output_tokens": 32768, - "input_cost_per_token": 4e-07, - "output_cost_per_token": 4e-07, + "input_cost_per_token": 4e-7, + "output_cost_per_token": 4e-7, "litellm_provider": "hyperbolic", "mode": "chat", "supports_function_calling": true, @@ -15593,8 +16284,8 @@ "max_tokens": 32768, "max_input_tokens": 32768, "max_output_tokens": 32768, - "input_cost_per_token": 2e-07, - "output_cost_per_token": 2e-07, + "input_cost_per_token": 2e-7, + "output_cost_per_token": 2e-7, "litellm_provider": "hyperbolic", "mode": "chat", "supports_function_calling": true, @@ -15606,8 +16297,8 @@ "max_tokens": 131072, "max_input_tokens": 131072, "max_output_tokens": 131072, - "input_cost_per_token": 1.2e-07, - "output_cost_per_token": 3e-07, + "input_cost_per_token": 1.2e-7, + "output_cost_per_token": 3e-7, "litellm_provider": "hyperbolic", "mode": "chat", "supports_function_calling": true, @@ -15619,8 +16310,8 @@ "max_tokens": 32768, "max_input_tokens": 32768, "max_output_tokens": 32768, - "input_cost_per_token": 1.2e-07, - "output_cost_per_token": 3e-07, + "input_cost_per_token": 1.2e-7, + "output_cost_per_token": 3e-7, "litellm_provider": "hyperbolic", "mode": "chat", "supports_function_calling": true, @@ -15632,8 +16323,8 @@ "max_tokens": 32768, "max_input_tokens": 32768, "max_output_tokens": 32768, - "input_cost_per_token": 1.2e-07, - "output_cost_per_token": 3e-07, + "input_cost_per_token": 1.2e-7, + "output_cost_per_token": 3e-7, "litellm_provider": "hyperbolic", "mode": "chat", "supports_function_calling": true, @@ -15645,8 +16336,8 @@ "max_tokens": 131072, "max_input_tokens": 131072, "max_output_tokens": 131072, - "input_cost_per_token": 1.2e-07, - "output_cost_per_token": 3e-07, + "input_cost_per_token": 1.2e-7, + "output_cost_per_token": 3e-7, "litellm_provider": "hyperbolic", "mode": "chat", "supports_function_calling": true, @@ -15658,8 +16349,8 @@ "max_tokens": 131072, "max_input_tokens": 131072, "max_output_tokens": 131072, - "input_cost_per_token": 1.2e-07, - "output_cost_per_token": 3e-07, + "input_cost_per_token": 1.2e-7, + "output_cost_per_token": 3e-7, "litellm_provider": "hyperbolic", "mode": "chat", "supports_function_calling": true, @@ -15671,8 +16362,8 @@ "max_tokens": 32768, "max_input_tokens": 32768, "max_output_tokens": 32768, - "input_cost_per_token": 1.2e-07, - "output_cost_per_token": 3e-07, + "input_cost_per_token": 1.2e-7, + "output_cost_per_token": 3e-7, "litellm_provider": "hyperbolic", "mode": "chat", "supports_function_calling": true, @@ -15684,8 +16375,8 @@ "max_tokens": 32768, "max_input_tokens": 32768, "max_output_tokens": 32768, - "input_cost_per_token": 1.2e-07, - "output_cost_per_token": 3e-07, + "input_cost_per_token": 1.2e-7, + "output_cost_per_token": 3e-7, "litellm_provider": "hyperbolic", "mode": "chat", "supports_function_calling": true, @@ -15697,8 +16388,8 @@ "max_tokens": 32768, "max_input_tokens": 32768, "max_output_tokens": 32768, - "input_cost_per_token": 1.2e-07, - "output_cost_per_token": 3e-07, + "input_cost_per_token": 1.2e-7, + "output_cost_per_token": 3e-7, "litellm_provider": "hyperbolic", "mode": "chat", "supports_function_calling": true, @@ -15710,8 +16401,8 @@ "max_tokens": 32768, "max_input_tokens": 32768, "max_output_tokens": 32768, - "input_cost_per_token": 1.2e-07, - "output_cost_per_token": 3e-07, + "input_cost_per_token": 1.2e-7, + "output_cost_per_token": 3e-7, "litellm_provider": "hyperbolic", "mode": "chat", "supports_function_calling": true, @@ -15722,96 +16413,96 @@ "voyage/voyage-lite-01": { "max_tokens": 4096, "max_input_tokens": 4096, - "input_cost_per_token": 1e-07, - "output_cost_per_token": 0.0, + "input_cost_per_token": 1e-7, + "output_cost_per_token": 0, "litellm_provider": "voyage", "mode": "embedding" }, "voyage/voyage-large-2": { "max_tokens": 16000, "max_input_tokens": 16000, - "input_cost_per_token": 1.2e-07, - "output_cost_per_token": 0.0, + "input_cost_per_token": 1.2e-7, + "output_cost_per_token": 0, "litellm_provider": "voyage", "mode": "embedding" }, "voyage/voyage-finance-2": { "max_tokens": 32000, "max_input_tokens": 32000, - "input_cost_per_token": 1.2e-07, - "output_cost_per_token": 0.0, + "input_cost_per_token": 1.2e-7, + "output_cost_per_token": 0, "litellm_provider": "voyage", "mode": "embedding" }, "voyage/voyage-lite-02-instruct": { "max_tokens": 4000, "max_input_tokens": 4000, - "input_cost_per_token": 1e-07, - "output_cost_per_token": 0.0, + "input_cost_per_token": 1e-7, + "output_cost_per_token": 0, "litellm_provider": "voyage", "mode": "embedding" }, "voyage/voyage-law-2": { "max_tokens": 16000, "max_input_tokens": 16000, - "input_cost_per_token": 1.2e-07, - "output_cost_per_token": 0.0, + "input_cost_per_token": 1.2e-7, + "output_cost_per_token": 0, "litellm_provider": "voyage", "mode": "embedding" }, "voyage/voyage-code-2": { "max_tokens": 16000, "max_input_tokens": 16000, - "input_cost_per_token": 1.2e-07, - "output_cost_per_token": 0.0, + "input_cost_per_token": 1.2e-7, + "output_cost_per_token": 0, "litellm_provider": "voyage", "mode": "embedding" }, "voyage/voyage-2": { "max_tokens": 4000, "max_input_tokens": 4000, - "input_cost_per_token": 1e-07, - "output_cost_per_token": 0.0, + "input_cost_per_token": 1e-7, + "output_cost_per_token": 0, "litellm_provider": "voyage", "mode": "embedding" }, "voyage/voyage-3-large": { "max_tokens": 32000, "max_input_tokens": 32000, - "input_cost_per_token": 1.8e-07, - "output_cost_per_token": 0.0, + "input_cost_per_token": 1.8e-7, + "output_cost_per_token": 0, "litellm_provider": "voyage", "mode": "embedding" }, "voyage/voyage-3": { "max_tokens": 32000, "max_input_tokens": 32000, - "input_cost_per_token": 6e-08, - "output_cost_per_token": 0.0, + "input_cost_per_token": 6e-8, + "output_cost_per_token": 0, "litellm_provider": "voyage", "mode": "embedding" }, "voyage/voyage-3-lite": { "max_tokens": 32000, "max_input_tokens": 32000, - "input_cost_per_token": 2e-08, - "output_cost_per_token": 0.0, + "input_cost_per_token": 2e-8, + "output_cost_per_token": 0, "litellm_provider": "voyage", "mode": "embedding" }, "voyage/voyage-code-3": { "max_tokens": 32000, "max_input_tokens": 32000, - "input_cost_per_token": 1.8e-07, - "output_cost_per_token": 0.0, + "input_cost_per_token": 1.8e-7, + "output_cost_per_token": 0, "litellm_provider": "voyage", "mode": "embedding" }, "voyage/voyage-multimodal-3": { "max_tokens": 32000, "max_input_tokens": 32000, - "input_cost_per_token": 1.2e-07, - "output_cost_per_token": 0.0, + "input_cost_per_token": 1.2e-7, + "output_cost_per_token": 0, "litellm_provider": "voyage", "mode": "embedding" }, @@ -15820,9 +16511,9 @@ "max_input_tokens": 16000, "max_output_tokens": 16000, "max_query_tokens": 16000, - "input_cost_per_token": 5e-08, - "input_cost_per_query": 5e-08, - "output_cost_per_token": 0.0, + "input_cost_per_token": 5e-8, + "input_cost_per_query": 5e-8, + "output_cost_per_token": 0, "litellm_provider": "voyage", "mode": "rerank" }, @@ -15831,9 +16522,9 @@ "max_input_tokens": 8000, "max_output_tokens": 8000, "max_query_tokens": 8000, - "input_cost_per_token": 2e-08, - "input_cost_per_query": 2e-08, - "output_cost_per_token": 0.0, + "input_cost_per_token": 2e-8, + "input_cost_per_query": 2e-8, + "output_cost_per_token": 0, "litellm_provider": "voyage", "mode": "rerank" }, @@ -15841,9 +16532,9 @@ "max_tokens": 200000, "max_input_tokens": 200000, "max_output_tokens": 128000, - "input_cost_per_token": 2.5e-06, - "input_dbu_cost_per_token": 3.571e-05, - "output_cost_per_token": 1.7857e-05, + "input_cost_per_token": 0.0000025, + "input_dbu_cost_per_token": 0.00003571, + "output_cost_per_token": 0.000017857, "output_db_cost_per_token": 0.000214286, "litellm_provider": "databricks", "mode": "chat", @@ -15860,9 +16551,9 @@ "max_tokens": 128000, "max_input_tokens": 128000, "max_output_tokens": 128000, - "input_cost_per_token": 5e-06, - "input_dbu_cost_per_token": 7.1429e-05, - "output_cost_per_token": 1.500002e-05, + "input_cost_per_token": 0.000005, + "input_dbu_cost_per_token": 0.000071429, + "output_cost_per_token": 0.00001500002, "output_db_cost_per_token": 0.000214286, "litellm_provider": "databricks", "mode": "chat", @@ -15876,10 +16567,10 @@ "max_tokens": 128000, "max_input_tokens": 128000, "max_output_tokens": 128000, - "input_cost_per_token": 1.00002e-06, - "input_dbu_cost_per_token": 1.4286e-05, - "output_cost_per_token": 2.99999e-06, - "output_dbu_cost_per_token": 4.2857e-05, + "input_cost_per_token": 0.00000100002, + "input_dbu_cost_per_token": 0.000014286, + "output_cost_per_token": 0.00000299999, + "output_dbu_cost_per_token": 0.000042857, "litellm_provider": "databricks", "mode": "chat", "source": "https://www.databricks.com/product/pricing/foundation-model-serving", @@ -15892,10 +16583,10 @@ "max_tokens": 128000, "max_input_tokens": 128000, "max_output_tokens": 128000, - "input_cost_per_token": 1.00002e-06, - "input_dbu_cost_per_token": 1.4286e-05, - "output_cost_per_token": 2.99999e-06, - "output_dbu_cost_per_token": 4.2857e-05, + "input_cost_per_token": 0.00000100002, + "input_dbu_cost_per_token": 0.000014286, + "output_cost_per_token": 0.00000299999, + "output_dbu_cost_per_token": 0.000042857, "litellm_provider": "databricks", "mode": "chat", "source": "https://www.databricks.com/product/pricing/foundation-model-serving", @@ -15908,9 +16599,9 @@ "max_tokens": 128000, "max_input_tokens": 128000, "max_output_tokens": 128000, - "input_cost_per_token": 5e-06, - "input_dbu_cost_per_token": 7.143e-05, - "output_cost_per_token": 1.5e-05, + "input_cost_per_token": 0.000005, + "input_dbu_cost_per_token": 0.00007143, + "output_cost_per_token": 0.000015, "output_dbu_cost_per_token": 0.00021429, "litellm_provider": "databricks", "mode": "chat", @@ -15924,10 +16615,10 @@ "max_tokens": 32768, "max_input_tokens": 32768, "max_output_tokens": 32768, - "input_cost_per_token": 7.4998e-07, - "input_dbu_cost_per_token": 1.0714e-05, - "output_cost_per_token": 2.24901e-06, - "output_dbu_cost_per_token": 3.2143e-05, + "input_cost_per_token": 7.4998e-7, + "input_dbu_cost_per_token": 0.000010714, + "output_cost_per_token": 0.00000224901, + "output_dbu_cost_per_token": 0.000032143, "litellm_provider": "databricks", "mode": "chat", "source": "https://www.databricks.com/product/pricing/foundation-model-serving", @@ -15940,10 +16631,10 @@ "max_tokens": 128000, "max_input_tokens": 128000, "max_output_tokens": 128000, - "input_cost_per_token": 1.00002e-06, - "input_dbu_cost_per_token": 1.4286e-05, - "output_cost_per_token": 2.99999e-06, - "output_dbu_cost_per_token": 4.2857e-05, + "input_cost_per_token": 0.00000100002, + "input_dbu_cost_per_token": 0.000014286, + "output_cost_per_token": 0.00000299999, + "output_dbu_cost_per_token": 0.000042857, "litellm_provider": "databricks", "mode": "chat", "source": "https://www.databricks.com/product/pricing/foundation-model-serving", @@ -15956,10 +16647,10 @@ "max_tokens": 4096, "max_input_tokens": 4096, "max_output_tokens": 4096, - "input_cost_per_token": 5.0001e-07, - "input_dbu_cost_per_token": 7.143e-06, - "output_cost_per_token": 1.5e-06, - "output_dbu_cost_per_token": 2.1429e-05, + "input_cost_per_token": 5.0001e-7, + "input_dbu_cost_per_token": 0.000007143, + "output_cost_per_token": 0.0000015, + "output_dbu_cost_per_token": 0.000021429, "litellm_provider": "databricks", "mode": "chat", "source": "https://www.databricks.com/product/pricing/foundation-model-serving", @@ -15972,10 +16663,10 @@ "max_tokens": 4096, "max_input_tokens": 4096, "max_output_tokens": 4096, - "input_cost_per_token": 5.0001e-07, - "input_dbu_cost_per_token": 7.143e-06, - "output_cost_per_token": 9.9902e-07, - "output_dbu_cost_per_token": 1.4286e-05, + "input_cost_per_token": 5.0001e-7, + "input_dbu_cost_per_token": 0.000007143, + "output_cost_per_token": 9.9902e-7, + "output_dbu_cost_per_token": 0.000014286, "litellm_provider": "databricks", "mode": "chat", "source": "https://www.databricks.com/product/pricing/foundation-model-serving", @@ -15988,10 +16679,10 @@ "max_tokens": 8192, "max_input_tokens": 8192, "max_output_tokens": 8192, - "input_cost_per_token": 9.9902e-07, - "input_dbu_cost_per_token": 1.4286e-05, - "output_cost_per_token": 9.9902e-07, - "output_dbu_cost_per_token": 1.4286e-05, + "input_cost_per_token": 9.9902e-7, + "input_dbu_cost_per_token": 0.000014286, + "output_cost_per_token": 9.9902e-7, + "output_dbu_cost_per_token": 0.000014286, "litellm_provider": "databricks", "mode": "chat", "source": "https://www.databricks.com/product/pricing/foundation-model-serving", @@ -16004,10 +16695,10 @@ "max_tokens": 8192, "max_input_tokens": 8192, "max_output_tokens": 8192, - "input_cost_per_token": 5.0001e-07, - "input_dbu_cost_per_token": 7.143e-06, - "output_cost_per_token": 0.0, - "output_dbu_cost_per_token": 0.0, + "input_cost_per_token": 5.0001e-7, + "input_dbu_cost_per_token": 0.000007143, + "output_cost_per_token": 0, + "output_dbu_cost_per_token": 0, "litellm_provider": "databricks", "mode": "chat", "source": "https://www.databricks.com/product/pricing/foundation-model-serving", @@ -16020,10 +16711,10 @@ "max_tokens": 512, "max_input_tokens": 512, "output_vector_size": 1024, - "input_cost_per_token": 1.0003e-07, - "input_dbu_cost_per_token": 1.429e-06, - "output_cost_per_token": 0.0, - "output_dbu_cost_per_token": 0.0, + "input_cost_per_token": 1.0003e-7, + "input_dbu_cost_per_token": 0.000001429, + "output_cost_per_token": 0, + "output_dbu_cost_per_token": 0, "litellm_provider": "databricks", "mode": "embedding", "source": "https://www.databricks.com/product/pricing/foundation-model-serving", @@ -16035,10 +16726,10 @@ "max_tokens": 8192, "max_input_tokens": 8192, "output_vector_size": 1024, - "input_cost_per_token": 1.2999e-07, - "input_dbu_cost_per_token": 1.857e-06, - "output_cost_per_token": 0.0, - "output_dbu_cost_per_token": 0.0, + "input_cost_per_token": 1.2999e-7, + "input_dbu_cost_per_token": 0.000001857, + "output_cost_per_token": 0, + "output_dbu_cost_per_token": 0, "litellm_provider": "databricks", "mode": "embedding", "source": "https://www.databricks.com/product/pricing/foundation-model-serving", @@ -16050,8 +16741,8 @@ "max_tokens": 16384, "max_input_tokens": 16384, "max_output_tokens": 16384, - "input_cost_per_token": 1e-07, - "output_cost_per_token": 2e-07, + "input_cost_per_token": 1e-7, + "output_cost_per_token": 2e-7, "litellm_provider": "sambanova", "mode": "chat", "supports_function_calling": true, @@ -16063,8 +16754,8 @@ "max_tokens": 16384, "max_input_tokens": 16384, "max_output_tokens": 16384, - "input_cost_per_token": 5e-06, - "output_cost_per_token": 1e-05, + "input_cost_per_token": 0.000005, + "output_cost_per_token": 0.00001, "litellm_provider": "sambanova", "mode": "chat", "supports_function_calling": true, @@ -16076,8 +16767,8 @@ "max_tokens": 16384, "max_input_tokens": 16384, "max_output_tokens": 16384, - "input_cost_per_token": 4e-08, - "output_cost_per_token": 8e-08, + "input_cost_per_token": 4e-8, + "output_cost_per_token": 8e-8, "litellm_provider": "sambanova", "mode": "chat", "source": "https://cloud.sambanova.ai/plans/pricing" @@ -16086,8 +16777,8 @@ "max_tokens": 4096, "max_input_tokens": 4096, "max_output_tokens": 4096, - "input_cost_per_token": 8e-08, - "output_cost_per_token": 1.6e-07, + "input_cost_per_token": 8e-8, + "output_cost_per_token": 1.6e-7, "litellm_provider": "sambanova", "mode": "chat", "source": "https://cloud.sambanova.ai/plans/pricing" @@ -16096,8 +16787,8 @@ "max_tokens": 131072, "max_input_tokens": 131072, "max_output_tokens": 131072, - "input_cost_per_token": 6.3e-07, - "output_cost_per_token": 1.8e-06, + "input_cost_per_token": 6.3e-7, + "output_cost_per_token": 0.0000018, "litellm_provider": "sambanova", "mode": "chat", "supports_function_calling": true, @@ -16113,8 +16804,8 @@ "max_tokens": 8192, "max_input_tokens": 8192, "max_output_tokens": 8192, - "input_cost_per_token": 4e-07, - "output_cost_per_token": 7e-07, + "input_cost_per_token": 4e-7, + "output_cost_per_token": 7e-7, "litellm_provider": "sambanova", "mode": "chat", "supports_function_calling": true, @@ -16129,8 +16820,8 @@ "max_tokens": 131072, "max_input_tokens": 131072, "max_output_tokens": 131072, - "input_cost_per_token": 6e-07, - "output_cost_per_token": 1.2e-06, + "input_cost_per_token": 6e-7, + "output_cost_per_token": 0.0000012, "litellm_provider": "sambanova", "mode": "chat", "supports_function_calling": true, @@ -16142,8 +16833,8 @@ "max_tokens": 16384, "max_input_tokens": 16384, "max_output_tokens": 16384, - "input_cost_per_token": 3e-07, - "output_cost_per_token": 3e-07, + "input_cost_per_token": 3e-7, + "output_cost_per_token": 3e-7, "litellm_provider": "sambanova", "mode": "chat", "source": "https://cloud.sambanova.ai/plans/pricing" @@ -16152,8 +16843,8 @@ "max_tokens": 8192, "max_input_tokens": 8192, "max_output_tokens": 8192, - "input_cost_per_token": 4e-07, - "output_cost_per_token": 8e-07, + "input_cost_per_token": 4e-7, + "output_cost_per_token": 8e-7, "litellm_provider": "sambanova", "supports_function_calling": true, "supports_tool_choice": true, @@ -16165,8 +16856,8 @@ "max_tokens": 16384, "max_input_tokens": 16384, "max_output_tokens": 16384, - "input_cost_per_token": 5e-07, - "output_cost_per_token": 1e-06, + "input_cost_per_token": 5e-7, + "output_cost_per_token": 0.000001, "litellm_provider": "sambanova", "mode": "chat", "source": "https://cloud.sambanova.ai/plans/pricing" @@ -16175,7 +16866,7 @@ "max_tokens": 4096, "max_input_tokens": 4096, "max_output_tokens": 4096, - "input_cost_per_token": 5e-07, + "input_cost_per_token": 5e-7, "output_cost_per_token": 0.0001, "litellm_provider": "sambanova", "mode": "chat", @@ -16186,8 +16877,8 @@ "max_tokens": 131072, "max_input_tokens": 131072, "max_output_tokens": 131072, - "input_cost_per_token": 7e-07, - "output_cost_per_token": 1.4e-06, + "input_cost_per_token": 7e-7, + "output_cost_per_token": 0.0000014, "litellm_provider": "sambanova", "mode": "chat", "source": "https://cloud.sambanova.ai/plans/pricing" @@ -16196,8 +16887,8 @@ "max_tokens": 32768, "max_input_tokens": 32768, "max_output_tokens": 32768, - "input_cost_per_token": 5e-06, - "output_cost_per_token": 7e-06, + "input_cost_per_token": 0.000005, + "output_cost_per_token": 0.000007, "litellm_provider": "sambanova", "mode": "chat", "source": "https://cloud.sambanova.ai/plans/pricing" @@ -16206,8 +16897,8 @@ "max_tokens": 32768, "max_input_tokens": 32768, "max_output_tokens": 32768, - "input_cost_per_token": 3e-06, - "output_cost_per_token": 4.5e-06, + "input_cost_per_token": 0.000003, + "output_cost_per_token": 0.0000045, "litellm_provider": "sambanova", "mode": "chat", "supports_function_calling": true, @@ -16218,13 +16909,13 @@ "assemblyai/nano": { "mode": "audio_transcription", "input_cost_per_second": 0.00010278, - "output_cost_per_second": 0.0, + "output_cost_per_second": 0, "litellm_provider": "assemblyai" }, "assemblyai/best": { "mode": "audio_transcription", - "input_cost_per_second": 3.333e-05, - "output_cost_per_second": 0.0, + "input_cost_per_second": 0.00003333, + "output_cost_per_second": 0, "litellm_provider": "assemblyai" }, "jina-reranker-v2-base-multilingual": { @@ -16232,8 +16923,8 @@ "max_input_tokens": 1024, "max_output_tokens": 1024, "max_document_chunks_per_query": 2048, - "input_cost_per_token": 1.8e-08, - "output_cost_per_token": 1.8e-08, + "input_cost_per_token": 1.8e-8, + "output_cost_per_token": 1.8e-8, "litellm_provider": "jina_ai", "mode": "rerank" }, @@ -16407,44 +17098,220 @@ "litellm_provider": "snowflake", "mode": "chat" }, + "gradient_ai/anthropic-claude-3.7-sonnet": { + "input_cost_per_token": 0.000003, + "output_cost_per_token": 0.000015, + "litellm_provider": "gradient_ai", + "mode": "chat", + "max_tokens": 1024, + "supported_endpoints": [ + "/v1/chat/completions" + ], + "supported_modalities": [ + "text" + ], + "supports_tool_choice": false + }, + "gradient_ai/anthropic-claude-3.5-sonnet": { + "input_cost_per_token": 0.000003, + "output_cost_per_token": 0.000015, + "litellm_provider": "gradient_ai", + "mode": "chat", + "max_tokens": 1024, + "supported_endpoints": [ + "/v1/chat/completions" + ], + "supported_modalities": [ + "text" + ], + "supports_tool_choice": false + }, + "gradient_ai/anthropic-claude-3.5-haiku": { + "input_cost_per_token": 8e-7, + "output_cost_per_token": 0.000004, + "litellm_provider": "gradient_ai", + "mode": "chat", + "max_tokens": 1024, + "supported_endpoints": [ + "/v1/chat/completions" + ], + "supported_modalities": [ + "text" + ], + "supports_tool_choice": false + }, + "gradient_ai/anthropic-claude-3-opus": { + "input_cost_per_token": 0.000015, + "output_cost_per_token": 0.000075, + "litellm_provider": "gradient_ai", + "mode": "chat", + "max_tokens": 1024, + "supported_endpoints": [ + "/v1/chat/completions" + ], + "supported_modalities": [ + "text" + ], + "supports_tool_choice": false + }, + "gradient_ai/deepseek-r1-distill-llama-70b": { + "input_cost_per_token": 9.9e-7, + "output_cost_per_token": 9.9e-7, + "litellm_provider": "gradient_ai", + "mode": "chat", + "max_tokens": 8000, + "supported_endpoints": [ + "/v1/chat/completions" + ], + "supported_modalities": [ + "text" + ], + "supports_tool_choice": false + }, + "gradient_ai/llama3.3-70b-instruct": { + "input_cost_per_token": 6.5e-7, + "output_cost_per_token": 6.5e-7, + "litellm_provider": "gradient_ai", + "mode": "chat", + "max_tokens": 2048, + "supported_endpoints": [ + "/v1/chat/completions" + ], + "supported_modalities": [ + "text" + ], + "supports_tool_choice": false + }, + "gradient_ai/llama3-8b-instruct": { + "input_cost_per_token": 2e-7, + "output_cost_per_token": 2e-7, + "litellm_provider": "gradient_ai", + "mode": "chat", + "max_tokens": 512, + "supported_endpoints": [ + "/v1/chat/completions" + ], + "supported_modalities": [ + "text" + ], + "supports_tool_choice": false + }, + "gradient_ai/mistral-nemo-instruct-2407": { + "input_cost_per_token": 3e-7, + "output_cost_per_token": 3e-7, + "litellm_provider": "gradient_ai", + "mode": "chat", + "max_tokens": 512, + "supported_endpoints": [ + "/v1/chat/completions" + ], + "supported_modalities": [ + "text" + ], + "supports_tool_choice": false + }, + "gradient_ai/openai-o3": { + "input_cost_per_token": 0.000002, + "output_cost_per_token": 0.000008, + "litellm_provider": "gradient_ai", + "mode": "chat", + "max_tokens": 100000, + "supported_endpoints": [ + "/v1/chat/completions" + ], + "supported_modalities": [ + "text" + ], + "supports_tool_choice": false + }, + "gradient_ai/openai-o3-mini": { + "input_cost_per_token": 0.0000011, + "output_cost_per_token": 0.0000044, + "litellm_provider": "gradient_ai", + "mode": "chat", + "max_tokens": 100000, + "supported_endpoints": [ + "/v1/chat/completions" + ], + "supported_modalities": [ + "text" + ], + "supports_tool_choice": false + }, + "gradient_ai/openai-gpt-4o": { + "litellm_provider": "gradient_ai", + "mode": "chat", + "max_tokens": 16384, + "supported_endpoints": [ + "/v1/chat/completions" + ], + "supported_modalities": [ + "text" + ], + "supports_tool_choice": false + }, + "gradient_ai/openai-gpt-4o-mini": { + "litellm_provider": "gradient_ai", + "mode": "chat", + "max_tokens": 16384, + "supported_endpoints": [ + "/v1/chat/completions" + ], + "supported_modalities": [ + "text" + ], + "supports_tool_choice": false + }, + "gradient_ai/alibaba-qwen3-32b": { + "litellm_provider": "gradient_ai", + "mode": "chat", + "max_tokens": 2048, + "supported_endpoints": [ + "/v1/chat/completions" + ], + "supported_modalities": [ + "text" + ], + "supports_tool_choice": false + }, "nscale/meta-llama/Llama-4-Scout-17B-16E-Instruct": { - "input_cost_per_token": 9e-08, - "output_cost_per_token": 2.9e-07, + "input_cost_per_token": 9e-8, + "output_cost_per_token": 2.9e-7, "litellm_provider": "nscale", "mode": "chat", "source": "https://docs.nscale.com/docs/inference/serverless-models/current#chat-models" }, "nscale/Qwen/Qwen2.5-Coder-3B-Instruct": { - "input_cost_per_token": 1e-08, - "output_cost_per_token": 3e-08, + "input_cost_per_token": 1e-8, + "output_cost_per_token": 3e-8, "litellm_provider": "nscale", "mode": "chat", "source": "https://docs.nscale.com/docs/inference/serverless-models/current#chat-models" }, "nscale/Qwen/Qwen2.5-Coder-7B-Instruct": { - "input_cost_per_token": 1e-08, - "output_cost_per_token": 3e-08, + "input_cost_per_token": 1e-8, + "output_cost_per_token": 3e-8, "litellm_provider": "nscale", "mode": "chat", "source": "https://docs.nscale.com/docs/inference/serverless-models/current#chat-models" }, "nscale/Qwen/Qwen2.5-Coder-32B-Instruct": { - "input_cost_per_token": 6e-08, - "output_cost_per_token": 2e-07, + "input_cost_per_token": 6e-8, + "output_cost_per_token": 2e-7, "litellm_provider": "nscale", "mode": "chat", "source": "https://docs.nscale.com/docs/inference/serverless-models/current#chat-models" }, "nscale/Qwen/QwQ-32B": { - "input_cost_per_token": 1.8e-07, - "output_cost_per_token": 2e-07, + "input_cost_per_token": 1.8e-7, + "output_cost_per_token": 2e-7, "litellm_provider": "nscale", "mode": "chat", "source": "https://docs.nscale.com/docs/inference/serverless-models/current#chat-models" }, "nscale/deepseek-ai/DeepSeek-R1-Distill-Llama-70B": { - "input_cost_per_token": 3.75e-07, - "output_cost_per_token": 3.75e-07, + "input_cost_per_token": 3.75e-7, + "output_cost_per_token": 3.75e-7, "litellm_provider": "nscale", "mode": "chat", "source": "https://docs.nscale.com/docs/inference/serverless-models/current#chat-models", @@ -16453,8 +17320,8 @@ } }, "nscale/deepseek-ai/DeepSeek-R1-Distill-Llama-8B": { - "input_cost_per_token": 2.5e-08, - "output_cost_per_token": 2.5e-08, + "input_cost_per_token": 2.5e-8, + "output_cost_per_token": 2.5e-8, "litellm_provider": "nscale", "mode": "chat", "source": "https://docs.nscale.com/docs/inference/serverless-models/current#chat-models", @@ -16463,8 +17330,8 @@ } }, "nscale/deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B": { - "input_cost_per_token": 9e-08, - "output_cost_per_token": 9e-08, + "input_cost_per_token": 9e-8, + "output_cost_per_token": 9e-8, "litellm_provider": "nscale", "mode": "chat", "source": "https://docs.nscale.com/docs/inference/serverless-models/current#chat-models", @@ -16473,8 +17340,8 @@ } }, "nscale/deepseek-ai/DeepSeek-R1-Distill-Qwen-7B": { - "input_cost_per_token": 2e-07, - "output_cost_per_token": 2e-07, + "input_cost_per_token": 2e-7, + "output_cost_per_token": 2e-7, "litellm_provider": "nscale", "mode": "chat", "source": "https://docs.nscale.com/docs/inference/serverless-models/current#chat-models", @@ -16483,8 +17350,8 @@ } }, "nscale/deepseek-ai/DeepSeek-R1-Distill-Qwen-14B": { - "input_cost_per_token": 7e-08, - "output_cost_per_token": 7e-08, + "input_cost_per_token": 7e-8, + "output_cost_per_token": 7e-8, "litellm_provider": "nscale", "mode": "chat", "source": "https://docs.nscale.com/docs/inference/serverless-models/current#chat-models", @@ -16493,8 +17360,8 @@ } }, "nscale/deepseek-ai/DeepSeek-R1-Distill-Qwen-32B": { - "input_cost_per_token": 1.5e-07, - "output_cost_per_token": 1.5e-07, + "input_cost_per_token": 1.5e-7, + "output_cost_per_token": 1.5e-7, "litellm_provider": "nscale", "mode": "chat", "source": "https://docs.nscale.com/docs/inference/serverless-models/current#chat-models", @@ -16503,8 +17370,8 @@ } }, "nscale/mistralai/mixtral-8x22b-instruct-v0.1": { - "input_cost_per_token": 6e-07, - "output_cost_per_token": 6e-07, + "input_cost_per_token": 6e-7, + "output_cost_per_token": 6e-7, "litellm_provider": "nscale", "mode": "chat", "source": "https://docs.nscale.com/docs/inference/serverless-models/current#chat-models", @@ -16513,8 +17380,8 @@ } }, "nscale/meta-llama/Llama-3.1-8B-Instruct": { - "input_cost_per_token": 3e-08, - "output_cost_per_token": 3e-08, + "input_cost_per_token": 3e-8, + "output_cost_per_token": 3e-8, "litellm_provider": "nscale", "mode": "chat", "source": "https://docs.nscale.com/docs/inference/serverless-models/current#chat-models", @@ -16523,8 +17390,8 @@ } }, "nscale/meta-llama/Llama-3.3-70B-Instruct": { - "input_cost_per_token": 2e-07, - "output_cost_per_token": 2e-07, + "input_cost_per_token": 2e-7, + "output_cost_per_token": 2e-7, "litellm_provider": "nscale", "mode": "chat", "source": "https://docs.nscale.com/docs/inference/serverless-models/current#chat-models", @@ -16534,8 +17401,8 @@ }, "nscale/black-forest-labs/FLUX.1-schnell": { "mode": "image_generation", - "input_cost_per_pixel": 1.3e-09, - "output_cost_per_pixel": 0.0, + "input_cost_per_pixel": 1.3e-9, + "output_cost_per_pixel": 0, "litellm_provider": "nscale", "supported_endpoints": [ "/v1/images/generations" @@ -16544,8 +17411,8 @@ }, "nscale/stabilityai/stable-diffusion-xl-base-1.0": { "mode": "image_generation", - "input_cost_per_pixel": 3e-09, - "output_cost_per_pixel": 0.0, + "input_cost_per_pixel": 3e-9, + "output_cost_per_pixel": 0, "litellm_provider": "nscale", "supported_endpoints": [ "/v1/images/generations" @@ -16568,8 +17435,8 @@ }, "deepgram/nova-3": { "mode": "audio_transcription", - "input_cost_per_second": 7.167e-05, - "output_cost_per_second": 0.0, + "input_cost_per_second": 0.00007167, + "output_cost_per_second": 0, "litellm_provider": "deepgram", "supported_endpoints": [ "/v1/audio/transcriptions" @@ -16582,8 +17449,8 @@ }, "deepgram/nova-3-general": { "mode": "audio_transcription", - "input_cost_per_second": 7.167e-05, - "output_cost_per_second": 0.0, + "input_cost_per_second": 0.00007167, + "output_cost_per_second": 0, "litellm_provider": "deepgram", "supported_endpoints": [ "/v1/audio/transcriptions" @@ -16596,8 +17463,8 @@ }, "deepgram/nova-3-medical": { "mode": "audio_transcription", - "input_cost_per_second": 8.667e-05, - "output_cost_per_second": 0.0, + "input_cost_per_second": 0.00008667, + "output_cost_per_second": 0, "litellm_provider": "deepgram", "supported_endpoints": [ "/v1/audio/transcriptions" @@ -16610,8 +17477,8 @@ }, "deepgram/nova-2": { "mode": "audio_transcription", - "input_cost_per_second": 7.167e-05, - "output_cost_per_second": 0.0, + "input_cost_per_second": 0.00007167, + "output_cost_per_second": 0, "litellm_provider": "deepgram", "supported_endpoints": [ "/v1/audio/transcriptions" @@ -16624,8 +17491,8 @@ }, "deepgram/nova-2-general": { "mode": "audio_transcription", - "input_cost_per_second": 7.167e-05, - "output_cost_per_second": 0.0, + "input_cost_per_second": 0.00007167, + "output_cost_per_second": 0, "litellm_provider": "deepgram", "supported_endpoints": [ "/v1/audio/transcriptions" @@ -16638,8 +17505,8 @@ }, "deepgram/nova-2-meeting": { "mode": "audio_transcription", - "input_cost_per_second": 7.167e-05, - "output_cost_per_second": 0.0, + "input_cost_per_second": 0.00007167, + "output_cost_per_second": 0, "litellm_provider": "deepgram", "supported_endpoints": [ "/v1/audio/transcriptions" @@ -16652,8 +17519,8 @@ }, "deepgram/nova-2-phonecall": { "mode": "audio_transcription", - "input_cost_per_second": 7.167e-05, - "output_cost_per_second": 0.0, + "input_cost_per_second": 0.00007167, + "output_cost_per_second": 0, "litellm_provider": "deepgram", "supported_endpoints": [ "/v1/audio/transcriptions" @@ -16666,8 +17533,8 @@ }, "deepgram/nova-2-voicemail": { "mode": "audio_transcription", - "input_cost_per_second": 7.167e-05, - "output_cost_per_second": 0.0, + "input_cost_per_second": 0.00007167, + "output_cost_per_second": 0, "litellm_provider": "deepgram", "supported_endpoints": [ "/v1/audio/transcriptions" @@ -16680,8 +17547,8 @@ }, "deepgram/nova-2-finance": { "mode": "audio_transcription", - "input_cost_per_second": 7.167e-05, - "output_cost_per_second": 0.0, + "input_cost_per_second": 0.00007167, + "output_cost_per_second": 0, "litellm_provider": "deepgram", "supported_endpoints": [ "/v1/audio/transcriptions" @@ -16694,8 +17561,8 @@ }, "deepgram/nova-2-conversationalai": { "mode": "audio_transcription", - "input_cost_per_second": 7.167e-05, - "output_cost_per_second": 0.0, + "input_cost_per_second": 0.00007167, + "output_cost_per_second": 0, "litellm_provider": "deepgram", "supported_endpoints": [ "/v1/audio/transcriptions" @@ -16708,8 +17575,8 @@ }, "deepgram/nova-2-video": { "mode": "audio_transcription", - "input_cost_per_second": 7.167e-05, - "output_cost_per_second": 0.0, + "input_cost_per_second": 0.00007167, + "output_cost_per_second": 0, "litellm_provider": "deepgram", "supported_endpoints": [ "/v1/audio/transcriptions" @@ -16722,8 +17589,8 @@ }, "deepgram/nova-2-drivethru": { "mode": "audio_transcription", - "input_cost_per_second": 7.167e-05, - "output_cost_per_second": 0.0, + "input_cost_per_second": 0.00007167, + "output_cost_per_second": 0, "litellm_provider": "deepgram", "supported_endpoints": [ "/v1/audio/transcriptions" @@ -16736,8 +17603,8 @@ }, "deepgram/nova-2-automotive": { "mode": "audio_transcription", - "input_cost_per_second": 7.167e-05, - "output_cost_per_second": 0.0, + "input_cost_per_second": 0.00007167, + "output_cost_per_second": 0, "litellm_provider": "deepgram", "supported_endpoints": [ "/v1/audio/transcriptions" @@ -16750,8 +17617,8 @@ }, "deepgram/nova-2-atc": { "mode": "audio_transcription", - "input_cost_per_second": 7.167e-05, - "output_cost_per_second": 0.0, + "input_cost_per_second": 0.00007167, + "output_cost_per_second": 0, "litellm_provider": "deepgram", "supported_endpoints": [ "/v1/audio/transcriptions" @@ -16764,8 +17631,8 @@ }, "deepgram/nova": { "mode": "audio_transcription", - "input_cost_per_second": 7.167e-05, - "output_cost_per_second": 0.0, + "input_cost_per_second": 0.00007167, + "output_cost_per_second": 0, "litellm_provider": "deepgram", "supported_endpoints": [ "/v1/audio/transcriptions" @@ -16778,8 +17645,8 @@ }, "deepgram/nova-general": { "mode": "audio_transcription", - "input_cost_per_second": 7.167e-05, - "output_cost_per_second": 0.0, + "input_cost_per_second": 0.00007167, + "output_cost_per_second": 0, "litellm_provider": "deepgram", "supported_endpoints": [ "/v1/audio/transcriptions" @@ -16792,8 +17659,8 @@ }, "deepgram/nova-phonecall": { "mode": "audio_transcription", - "input_cost_per_second": 7.167e-05, - "output_cost_per_second": 0.0, + "input_cost_per_second": 0.00007167, + "output_cost_per_second": 0, "litellm_provider": "deepgram", "supported_endpoints": [ "/v1/audio/transcriptions" @@ -16807,7 +17674,7 @@ "deepgram/enhanced": { "mode": "audio_transcription", "input_cost_per_second": 0.00024167, - "output_cost_per_second": 0.0, + "output_cost_per_second": 0, "litellm_provider": "deepgram", "supported_endpoints": [ "/v1/audio/transcriptions" @@ -16821,7 +17688,7 @@ "deepgram/enhanced-general": { "mode": "audio_transcription", "input_cost_per_second": 0.00024167, - "output_cost_per_second": 0.0, + "output_cost_per_second": 0, "litellm_provider": "deepgram", "supported_endpoints": [ "/v1/audio/transcriptions" @@ -16835,7 +17702,7 @@ "deepgram/enhanced-meeting": { "mode": "audio_transcription", "input_cost_per_second": 0.00024167, - "output_cost_per_second": 0.0, + "output_cost_per_second": 0, "litellm_provider": "deepgram", "supported_endpoints": [ "/v1/audio/transcriptions" @@ -16849,7 +17716,7 @@ "deepgram/enhanced-phonecall": { "mode": "audio_transcription", "input_cost_per_second": 0.00024167, - "output_cost_per_second": 0.0, + "output_cost_per_second": 0, "litellm_provider": "deepgram", "supported_endpoints": [ "/v1/audio/transcriptions" @@ -16863,7 +17730,7 @@ "deepgram/enhanced-finance": { "mode": "audio_transcription", "input_cost_per_second": 0.00024167, - "output_cost_per_second": 0.0, + "output_cost_per_second": 0, "litellm_provider": "deepgram", "supported_endpoints": [ "/v1/audio/transcriptions" @@ -16877,7 +17744,7 @@ "deepgram/base": { "mode": "audio_transcription", "input_cost_per_second": 0.00020833, - "output_cost_per_second": 0.0, + "output_cost_per_second": 0, "litellm_provider": "deepgram", "supported_endpoints": [ "/v1/audio/transcriptions" @@ -16891,7 +17758,7 @@ "deepgram/base-general": { "mode": "audio_transcription", "input_cost_per_second": 0.00020833, - "output_cost_per_second": 0.0, + "output_cost_per_second": 0, "litellm_provider": "deepgram", "supported_endpoints": [ "/v1/audio/transcriptions" @@ -16905,7 +17772,7 @@ "deepgram/base-meeting": { "mode": "audio_transcription", "input_cost_per_second": 0.00020833, - "output_cost_per_second": 0.0, + "output_cost_per_second": 0, "litellm_provider": "deepgram", "supported_endpoints": [ "/v1/audio/transcriptions" @@ -16919,7 +17786,7 @@ "deepgram/base-phonecall": { "mode": "audio_transcription", "input_cost_per_second": 0.00020833, - "output_cost_per_second": 0.0, + "output_cost_per_second": 0, "litellm_provider": "deepgram", "supported_endpoints": [ "/v1/audio/transcriptions" @@ -16933,7 +17800,7 @@ "deepgram/base-voicemail": { "mode": "audio_transcription", "input_cost_per_second": 0.00020833, - "output_cost_per_second": 0.0, + "output_cost_per_second": 0, "litellm_provider": "deepgram", "supported_endpoints": [ "/v1/audio/transcriptions" @@ -16947,7 +17814,7 @@ "deepgram/base-finance": { "mode": "audio_transcription", "input_cost_per_second": 0.00020833, - "output_cost_per_second": 0.0, + "output_cost_per_second": 0, "litellm_provider": "deepgram", "supported_endpoints": [ "/v1/audio/transcriptions" @@ -16961,7 +17828,7 @@ "deepgram/base-conversationalai": { "mode": "audio_transcription", "input_cost_per_second": 0.00020833, - "output_cost_per_second": 0.0, + "output_cost_per_second": 0, "litellm_provider": "deepgram", "supported_endpoints": [ "/v1/audio/transcriptions" @@ -16975,7 +17842,7 @@ "deepgram/base-video": { "mode": "audio_transcription", "input_cost_per_second": 0.00020833, - "output_cost_per_second": 0.0, + "output_cost_per_second": 0, "litellm_provider": "deepgram", "supported_endpoints": [ "/v1/audio/transcriptions" @@ -16989,7 +17856,7 @@ "deepgram/whisper": { "mode": "audio_transcription", "input_cost_per_second": 0.0001, - "output_cost_per_second": 0.0, + "output_cost_per_second": 0, "litellm_provider": "deepgram", "supported_endpoints": [ "/v1/audio/transcriptions" @@ -17002,7 +17869,7 @@ "deepgram/whisper-tiny": { "mode": "audio_transcription", "input_cost_per_second": 0.0001, - "output_cost_per_second": 0.0, + "output_cost_per_second": 0, "litellm_provider": "deepgram", "supported_endpoints": [ "/v1/audio/transcriptions" @@ -17015,7 +17882,7 @@ "deepgram/whisper-base": { "mode": "audio_transcription", "input_cost_per_second": 0.0001, - "output_cost_per_second": 0.0, + "output_cost_per_second": 0, "litellm_provider": "deepgram", "supported_endpoints": [ "/v1/audio/transcriptions" @@ -17028,7 +17895,7 @@ "deepgram/whisper-small": { "mode": "audio_transcription", "input_cost_per_second": 0.0001, - "output_cost_per_second": 0.0, + "output_cost_per_second": 0, "litellm_provider": "deepgram", "supported_endpoints": [ "/v1/audio/transcriptions" @@ -17041,7 +17908,7 @@ "deepgram/whisper-medium": { "mode": "audio_transcription", "input_cost_per_second": 0.0001, - "output_cost_per_second": 0.0, + "output_cost_per_second": 0, "litellm_provider": "deepgram", "supported_endpoints": [ "/v1/audio/transcriptions" @@ -17054,7 +17921,7 @@ "deepgram/whisper-large": { "mode": "audio_transcription", "input_cost_per_second": 0.0001, - "output_cost_per_second": 0.0, + "output_cost_per_second": 0, "litellm_provider": "deepgram", "supported_endpoints": [ "/v1/audio/transcriptions" @@ -17066,8 +17933,8 @@ }, "elevenlabs/scribe_v1": { "mode": "audio_transcription", - "input_cost_per_second": 6.11e-05, - "output_cost_per_second": 0.0, + "input_cost_per_second": 0.0000611, + "output_cost_per_second": 0, "litellm_provider": "elevenlabs", "supported_endpoints": [ "/v1/audio/transcriptions" @@ -17081,8 +17948,8 @@ }, "elevenlabs/scribe_v1_experimental": { "mode": "audio_transcription", - "input_cost_per_second": 6.11e-05, - "output_cost_per_second": 0.0, + "input_cost_per_second": 0.0000611, + "output_cost_per_second": 0, "litellm_provider": "elevenlabs", "supported_endpoints": [ "/v1/audio/transcriptions" @@ -17098,8 +17965,8 @@ "max_tokens": 8192, "max_input_tokens": 8192, "output_vector_size": 1536, - "input_cost_per_token": 1e-07, - "output_cost_per_token": 0.0, + "input_cost_per_token": 1e-7, + "output_cost_per_token": 0, "litellm_provider": "bedrock", "mode": "embedding" }, @@ -17107,8 +17974,8 @@ "max_tokens": 8192, "max_input_tokens": 8192, "output_vector_size": 1024, - "input_cost_per_token": 2e-07, - "output_cost_per_token": 0.0, + "input_cost_per_token": 2e-7, + "output_cost_per_token": 0, "litellm_provider": "bedrock", "mode": "embedding" }, @@ -17116,8 +17983,8 @@ "max_tokens": 8000, "max_input_tokens": 42000, "max_output_tokens": 8000, - "input_cost_per_token": 1.3e-06, - "output_cost_per_token": 1.7e-06, + "input_cost_per_token": 0.0000013, + "output_cost_per_token": 0.0000017, "litellm_provider": "bedrock", "mode": "chat" }, @@ -17125,8 +17992,8 @@ "max_tokens": 4000, "max_input_tokens": 42000, "max_output_tokens": 4000, - "input_cost_per_token": 3e-07, - "output_cost_per_token": 4e-07, + "input_cost_per_token": 3e-7, + "output_cost_per_token": 4e-7, "litellm_provider": "bedrock", "mode": "chat" }, @@ -17134,8 +18001,8 @@ "max_tokens": 32000, "max_input_tokens": 42000, "max_output_tokens": 32000, - "input_cost_per_token": 5e-07, - "output_cost_per_token": 1.5e-06, + "input_cost_per_token": 5e-7, + "output_cost_per_token": 0.0000015, "litellm_provider": "bedrock", "mode": "chat" }, @@ -17143,8 +18010,8 @@ "max_tokens": 8192, "max_input_tokens": 200000, "max_output_tokens": 8192, - "input_cost_per_token": 3.6e-06, - "output_cost_per_token": 1.8e-05, + "input_cost_per_token": 0.0000036, + "output_cost_per_token": 0.000018, "litellm_provider": "bedrock", "mode": "chat", "supports_function_calling": true, @@ -17157,8 +18024,8 @@ "max_tokens": 4096, "max_input_tokens": 200000, "max_output_tokens": 4096, - "input_cost_per_token": 3e-07, - "output_cost_per_token": 1.5e-06, + "input_cost_per_token": 3e-7, + "output_cost_per_token": 0.0000015, "litellm_provider": "bedrock", "mode": "chat", "supports_function_calling": true, @@ -17171,8 +18038,8 @@ "max_tokens": 2048, "max_input_tokens": 8000, "max_output_tokens": 2048, - "input_cost_per_token": 2.65e-06, - "output_cost_per_token": 3.5e-06, + "input_cost_per_token": 0.00000265, + "output_cost_per_token": 0.0000035, "litellm_provider": "bedrock", "mode": "chat", "supports_pdf_input": true @@ -17181,8 +18048,8 @@ "max_tokens": 2048, "max_input_tokens": 8000, "max_output_tokens": 2048, - "input_cost_per_token": 3e-07, - "output_cost_per_token": 2.65e-06, + "input_cost_per_token": 3e-7, + "output_cost_per_token": 0.00000265, "litellm_provider": "bedrock", "mode": "chat", "supports_pdf_input": true @@ -17191,8 +18058,8 @@ "max_tokens": 8192, "max_input_tokens": 8192, "output_vector_size": 1536, - "input_cost_per_token": 1e-07, - "output_cost_per_token": 0.0, + "input_cost_per_token": 1e-7, + "output_cost_per_token": 0, "litellm_provider": "bedrock", "mode": "embedding" }, @@ -17200,8 +18067,8 @@ "max_tokens": 8192, "max_input_tokens": 8192, "output_vector_size": 1024, - "input_cost_per_token": 2e-07, - "output_cost_per_token": 0.0, + "input_cost_per_token": 2e-7, + "output_cost_per_token": 0, "litellm_provider": "bedrock", "mode": "embedding" }, @@ -17209,8 +18076,8 @@ "max_tokens": 8000, "max_input_tokens": 42000, "max_output_tokens": 8000, - "input_cost_per_token": 1.3e-06, - "output_cost_per_token": 1.7e-06, + "input_cost_per_token": 0.0000013, + "output_cost_per_token": 0.0000017, "litellm_provider": "bedrock", "mode": "chat" }, @@ -17218,8 +18085,8 @@ "max_tokens": 4000, "max_input_tokens": 42000, "max_output_tokens": 4000, - "input_cost_per_token": 3e-07, - "output_cost_per_token": 4e-07, + "input_cost_per_token": 3e-7, + "output_cost_per_token": 4e-7, "litellm_provider": "bedrock", "mode": "chat" }, @@ -17227,8 +18094,8 @@ "max_tokens": 32000, "max_input_tokens": 42000, "max_output_tokens": 32000, - "input_cost_per_token": 5e-07, - "output_cost_per_token": 1.5e-06, + "input_cost_per_token": 5e-7, + "output_cost_per_token": 0.0000015, "litellm_provider": "bedrock", "mode": "chat" }, @@ -17236,8 +18103,8 @@ "max_tokens": 8192, "max_input_tokens": 200000, "max_output_tokens": 8192, - "input_cost_per_token": 3.6e-06, - "output_cost_per_token": 1.8e-05, + "input_cost_per_token": 0.0000036, + "output_cost_per_token": 0.000018, "litellm_provider": "bedrock", "mode": "chat", "supports_function_calling": true, @@ -17250,8 +18117,8 @@ "max_tokens": 4096, "max_input_tokens": 200000, "max_output_tokens": 4096, - "input_cost_per_token": 3e-07, - "output_cost_per_token": 1.5e-06, + "input_cost_per_token": 3e-7, + "output_cost_per_token": 0.0000015, "litellm_provider": "bedrock", "mode": "chat", "supports_function_calling": true, @@ -17264,8 +18131,8 @@ "max_tokens": 2048, "max_input_tokens": 8000, "max_output_tokens": 2048, - "input_cost_per_token": 2.65e-06, - "output_cost_per_token": 3.5e-06, + "input_cost_per_token": 0.00000265, + "output_cost_per_token": 0.0000035, "litellm_provider": "bedrock", "mode": "chat", "supports_pdf_input": true @@ -17274,8 +18141,8 @@ "max_tokens": 2048, "max_input_tokens": 8000, "max_output_tokens": 2048, - "input_cost_per_token": 3e-07, - "output_cost_per_token": 2.65e-06, + "input_cost_per_token": 3e-7, + "output_cost_per_token": 0.00000265, "litellm_provider": "bedrock", "mode": "chat", "supports_pdf_input": true @@ -17284,8 +18151,8 @@ "max_tokens": 10000, "max_input_tokens": 300000, "max_output_tokens": 10000, - "input_cost_per_token": 9.6e-07, - "output_cost_per_token": 3.84e-06, + "input_cost_per_token": 9.6e-7, + "output_cost_per_token": 0.00000384, "litellm_provider": "bedrock", "mode": "chat", "supports_function_calling": true, @@ -17298,8 +18165,8 @@ "max_tokens": 10000, "max_input_tokens": 300000, "max_output_tokens": 10000, - "input_cost_per_token": 9.6e-07, - "output_cost_per_token": 3.84e-06, + "input_cost_per_token": 9.6e-7, + "output_cost_per_token": 0.00000384, "litellm_provider": "bedrock", "mode": "chat", "supports_function_calling": true, @@ -17356,8 +18223,8 @@ "max_tokens": 8192, "max_input_tokens": 8192, "max_output_tokens": 8192, - "input_cost_per_token": 2e-07, - "output_cost_per_token": 2e-06, + "input_cost_per_token": 2e-7, + "output_cost_per_token": 0.000002, "litellm_provider": "moonshot", "supports_function_calling": true, "supports_tool_choice": true, @@ -17368,8 +18235,8 @@ "max_tokens": 32768, "max_input_tokens": 32768, "max_output_tokens": 32768, - "input_cost_per_token": 1e-06, - "output_cost_per_token": 3e-06, + "input_cost_per_token": 0.000001, + "output_cost_per_token": 0.000003, "litellm_provider": "moonshot", "supports_function_calling": true, "supports_tool_choice": true, @@ -17380,8 +18247,8 @@ "max_tokens": 131072, "max_input_tokens": 131072, "max_output_tokens": 131072, - "input_cost_per_token": 2e-06, - "output_cost_per_token": 5e-06, + "input_cost_per_token": 0.000002, + "output_cost_per_token": 0.000005, "litellm_provider": "moonshot", "supports_function_calling": true, "supports_tool_choice": true, @@ -17392,8 +18259,8 @@ "max_tokens": 131072, "max_input_tokens": 131072, "max_output_tokens": 131072, - "input_cost_per_token": 2e-06, - "output_cost_per_token": 5e-06, + "input_cost_per_token": 0.000002, + "output_cost_per_token": 0.000005, "litellm_provider": "moonshot", "supports_function_calling": true, "supports_tool_choice": true, @@ -17404,9 +18271,9 @@ "max_tokens": 131072, "max_input_tokens": 131072, "max_output_tokens": 131072, - "input_cost_per_token": 6e-07, - "output_cost_per_token": 2.5e-06, - "cache_read_input_token_cost": 1.5e-07, + "input_cost_per_token": 6e-7, + "output_cost_per_token": 0.0000025, + "cache_read_input_token_cost": 1.5e-7, "litellm_provider": "moonshot", "supports_function_calling": true, "supports_tool_choice": true, @@ -17418,8 +18285,8 @@ "max_tokens": 32768, "max_input_tokens": 32768, "max_output_tokens": 32768, - "input_cost_per_token": 1e-06, - "output_cost_per_token": 3e-06, + "input_cost_per_token": 0.000001, + "output_cost_per_token": 0.000003, "litellm_provider": "moonshot", "supports_function_calling": true, "supports_tool_choice": true, @@ -17430,8 +18297,8 @@ "max_tokens": 131072, "max_input_tokens": 131072, "max_output_tokens": 131072, - "input_cost_per_token": 2e-06, - "output_cost_per_token": 5e-06, + "input_cost_per_token": 0.000002, + "output_cost_per_token": 0.000005, "litellm_provider": "moonshot", "supports_function_calling": true, "supports_tool_choice": true, @@ -17442,8 +18309,8 @@ "max_tokens": 8192, "max_input_tokens": 8192, "max_output_tokens": 8192, - "input_cost_per_token": 2e-07, - "output_cost_per_token": 2e-06, + "input_cost_per_token": 2e-7, + "output_cost_per_token": 0.000002, "litellm_provider": "moonshot", "supports_function_calling": true, "supports_tool_choice": true, @@ -17454,9 +18321,9 @@ "max_tokens": 131072, "max_input_tokens": 131072, "max_output_tokens": 131072, - "input_cost_per_token": 2e-06, - "output_cost_per_token": 5e-06, - "cache_read_input_token_cost": 1.5e-07, + "input_cost_per_token": 0.000002, + "output_cost_per_token": 0.000005, + "cache_read_input_token_cost": 1.5e-7, "litellm_provider": "moonshot", "supports_function_calling": true, "supports_tool_choice": true, @@ -17468,9 +18335,9 @@ "max_tokens": 8192, "max_input_tokens": 8192, "max_output_tokens": 8192, - "input_cost_per_token": 2e-07, - "output_cost_per_token": 2e-06, - "cache_read_input_token_cost": 1.5e-07, + "input_cost_per_token": 2e-7, + "output_cost_per_token": 0.000002, + "cache_read_input_token_cost": 1.5e-7, "litellm_provider": "moonshot", "supports_function_calling": true, "supports_tool_choice": true, @@ -17482,9 +18349,9 @@ "max_tokens": 32768, "max_input_tokens": 32768, "max_output_tokens": 32768, - "input_cost_per_token": 1e-06, - "output_cost_per_token": 3e-06, - "cache_read_input_token_cost": 1.5e-07, + "input_cost_per_token": 0.000001, + "output_cost_per_token": 0.000003, + "cache_read_input_token_cost": 1.5e-7, "litellm_provider": "moonshot", "supports_function_calling": true, "supports_tool_choice": true, @@ -17496,9 +18363,9 @@ "max_tokens": 131072, "max_input_tokens": 131072, "max_output_tokens": 131072, - "input_cost_per_token": 2e-06, - "output_cost_per_token": 5e-06, - "cache_read_input_token_cost": 1.5e-07, + "input_cost_per_token": 0.000002, + "output_cost_per_token": 0.000005, + "cache_read_input_token_cost": 1.5e-7, "litellm_provider": "moonshot", "supports_function_calling": true, "supports_tool_choice": true, @@ -17510,8 +18377,8 @@ "max_tokens": 131072, "max_input_tokens": 131072, "max_output_tokens": 131072, - "input_cost_per_token": 3e-05, - "output_cost_per_token": 3e-05, + "input_cost_per_token": 0.00003, + "output_cost_per_token": 0.00003, "litellm_provider": "moonshot", "supports_vision": true, "mode": "chat", @@ -17521,8 +18388,8 @@ "max_tokens": 8192, "max_input_tokens": 8192, "max_output_tokens": 8192, - "input_cost_per_token": 2e-07, - "output_cost_per_token": 2e-06, + "input_cost_per_token": 2e-7, + "output_cost_per_token": 0.000002, "litellm_provider": "moonshot", "supports_function_calling": true, "supports_tool_choice": true, @@ -17534,8 +18401,8 @@ "max_tokens": 32768, "max_input_tokens": 32768, "max_output_tokens": 32768, - "input_cost_per_token": 1e-06, - "output_cost_per_token": 3e-06, + "input_cost_per_token": 0.000001, + "output_cost_per_token": 0.000003, "litellm_provider": "moonshot", "supports_function_calling": true, "supports_tool_choice": true, @@ -17547,8 +18414,8 @@ "max_tokens": 131072, "max_input_tokens": 131072, "max_output_tokens": 131072, - "input_cost_per_token": 2e-06, - "output_cost_per_token": 5e-06, + "input_cost_per_token": 0.000002, + "output_cost_per_token": 0.000005, "litellm_provider": "moonshot", "supports_function_calling": true, "supports_tool_choice": true, @@ -17578,8 +18445,8 @@ "max_tokens": 16000, "max_input_tokens": 16000, "max_output_tokens": 16000, - "input_cost_per_token": 8e-07, - "output_cost_per_token": 1.2e-06, + "input_cost_per_token": 8e-7, + "output_cost_per_token": 0.0000012, "litellm_provider": "morph", "mode": "chat", "supports_function_calling": false, @@ -17592,8 +18459,8 @@ "max_tokens": 16000, "max_input_tokens": 16000, "max_output_tokens": 16000, - "input_cost_per_token": 9e-07, - "output_cost_per_token": 1.9e-06, + "input_cost_per_token": 9e-7, + "output_cost_per_token": 0.0000019, "litellm_provider": "morph", "mode": "chat", "supports_function_calling": false, @@ -17601,5 +18468,125 @@ "supports_vision": false, "supports_system_messages": true, "supports_tool_choice": false + }, + "oci/meta.llama-4-maverick-17b-128e-instruct-fp8": { + "max_tokens": 512000, + "max_input_tokens": 512000, + "max_output_tokens": 4000, + "input_cost_per_token": 7.2e-7, + "output_cost_per_token": 7.2e-7, + "litellm_provider": "oci", + "mode": "chat", + "supports_function_calling": true, + "supports_response_schema": false, + "source": "https://www.oracle.com/artificial-intelligence/generative-ai/generative-ai-service/pricing" + }, + "oci/meta.llama-4-scout-17b-16e-instruct": { + "max_tokens": 192000, + "max_input_tokens": 192000, + "max_output_tokens": 4000, + "input_cost_per_token": 7.2e-7, + "output_cost_per_token": 7.2e-7, + "litellm_provider": "oci", + "mode": "chat", + "supports_function_calling": true, + "supports_response_schema": false, + "source": "https://www.oracle.com/artificial-intelligence/generative-ai/generative-ai-service/pricing" + }, + "oci/meta.llama-3.3-70b-instruct": { + "max_tokens": 128000, + "max_input_tokens": 128000, + "max_output_tokens": 4000, + "input_cost_per_token": 7.2e-7, + "output_cost_per_token": 7.2e-7, + "litellm_provider": "oci", + "mode": "chat", + "supports_function_calling": true, + "supports_response_schema": false, + "source": "https://www.oracle.com/artificial-intelligence/generative-ai/generative-ai-service/pricing" + }, + "oci/meta.llama-3.2-90b-vision-instruct": { + "max_tokens": 128000, + "max_input_tokens": 128000, + "max_output_tokens": 4000, + "input_cost_per_token": 0.000002, + "output_cost_per_token": 0.000002, + "litellm_provider": "oci", + "mode": "chat", + "supports_function_calling": true, + "supports_response_schema": false, + "source": "https://www.oracle.com/artificial-intelligence/generative-ai/generative-ai-service/pricing" + }, + "oci/meta.llama-3.1-405b-instruct": { + "max_tokens": 128000, + "max_input_tokens": 128000, + "max_output_tokens": 4000, + "input_cost_per_token": 0.00001068, + "output_cost_per_token": 0.00001068, + "litellm_provider": "oci", + "mode": "chat", + "supports_function_calling": true, + "supports_response_schema": false, + "source": "https://www.oracle.com/artificial-intelligence/generative-ai/generative-ai-service/pricing" + }, + "oci/xai.grok-4": { + "max_tokens": 128000, + "max_input_tokens": 128000, + "max_output_tokens": 128000, + "input_cost_per_token": 0.000003, + "output_cost_per_token": 1.5e-7, + "litellm_provider": "oci", + "mode": "chat", + "supports_function_calling": true, + "supports_response_schema": false, + "source": "https://www.oracle.com/artificial-intelligence/generative-ai/generative-ai-service/pricing" + }, + "oci/xai.grok-3": { + "max_tokens": 131072, + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "input_cost_per_token": 0.000003, + "output_cost_per_token": 1.5e-7, + "litellm_provider": "oci", + "mode": "chat", + "supports_function_calling": true, + "supports_response_schema": false, + "source": "https://www.oracle.com/artificial-intelligence/generative-ai/generative-ai-service/pricing" + }, + "oci/xai.grok-3-mini": { + "max_tokens": 131072, + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "input_cost_per_token": 3e-7, + "output_cost_per_token": 5e-7, + "litellm_provider": "oci", + "mode": "chat", + "supports_function_calling": true, + "supports_response_schema": false, + "source": "https://www.oracle.com/artificial-intelligence/generative-ai/generative-ai-service/pricing" + }, + "oci/xai.grok-3-fast": { + "max_tokens": 131072, + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "input_cost_per_token": 0.000005, + "output_cost_per_token": 0.000025, + "litellm_provider": "oci", + "mode": "chat", + "supports_function_calling": true, + "supports_response_schema": false, + "source": "https://www.oracle.com/artificial-intelligence/generative-ai/generative-ai-service/pricing" + }, + "oci/xai.grok-3-mini-fast": { + "max_tokens": 131072, + "max_input_tokens": 131072, + "max_output_tokens": 131072, + "input_cost_per_token": 6e-7, + "output_cost_per_token": 0.000004, + "litellm_provider": "oci", + "mode": "chat", + "supports_function_calling": true, + "supports_response_schema": false, + "source": "https://www.oracle.com/artificial-intelligence/generative-ai/generative-ai-service/pricing" } -} +} \ No newline at end of file diff --git a/scripts/test-window-remaining.js b/scripts/test-window-remaining.js new file mode 100644 index 00000000..79551eea --- /dev/null +++ b/scripts/test-window-remaining.js @@ -0,0 +1,78 @@ +const axios = require('axios') + +const BASE_URL = 'http://localhost:3312' + +// 你需要替换为一个有效的 API Key +const API_KEY = 'cr_your_api_key_here' + +async function testWindowRemaining() { + try { + console.log('🔍 测试时间窗口剩余时间功能...\n') + + // 第一步:获取 API Key ID + console.log('1. 获取 API Key ID...') + const idResponse = await axios.post(`${BASE_URL}/api-stats/api/get-key-id`, { + apiKey: API_KEY + }) + + if (!idResponse.data.success) { + throw new Error('Failed to get API Key ID') + } + + const apiId = idResponse.data.data.id + console.log(` ✅ API Key ID: ${apiId}\n`) + + // 第二步:查询统计数据 + console.log('2. 查询统计数据(包含时间窗口信息)...') + const statsResponse = await axios.post(`${BASE_URL}/api-stats/api/user-stats`, { + apiId + }) + + if (!statsResponse.data.success) { + throw new Error('Failed to get stats data') + } + + const stats = statsResponse.data.data + console.log(` ✅ 成功获取统计数据\n`) + + // 第三步:检查时间窗口信息 + console.log('3. 时间窗口信息:') + console.log(` - 窗口时长: ${stats.limits.rateLimitWindow} 分钟`) + console.log(` - 请求限制: ${stats.limits.rateLimitRequests || '无限制'}`) + console.log(` - Token限制: ${stats.limits.tokenLimit || '无限制'}`) + console.log(` - 当前请求数: ${stats.limits.currentWindowRequests}`) + console.log(` - 当前Token数: ${stats.limits.currentWindowTokens}`) + + if (stats.limits.windowStartTime) { + const startTime = new Date(stats.limits.windowStartTime) + const endTime = new Date(stats.limits.windowEndTime) + + console.log(`\n ⏰ 时间窗口状态:`) + console.log(` - 窗口开始时间: ${startTime.toLocaleString()}`) + console.log(` - 窗口结束时间: ${endTime.toLocaleString()}`) + console.log(` - 剩余时间: ${stats.limits.windowRemainingSeconds} 秒`) + + if (stats.limits.windowRemainingSeconds > 0) { + const minutes = Math.floor(stats.limits.windowRemainingSeconds / 60) + const seconds = stats.limits.windowRemainingSeconds % 60 + console.log(` - 格式化剩余时间: ${minutes}分${seconds}秒`) + console.log(` - 窗口状态: 🟢 活跃中`) + } else { + console.log(` - 窗口状态: 🔴 已过期(下次请求时重置)`) + } + } else { + console.log(`\n ⏰ 时间窗口状态: ⚪ 未启动(还没有任何请求)`) + } + + console.log('\n✅ 测试完成!时间窗口剩余时间功能正常工作。') + } catch (error) { + console.error('❌ 测试失败:', error.message) + if (error.response) { + console.error(' 响应数据:', error.response.data) + } + process.exit(1) + } +} + +// 运行测试 +testWindowRemaining() diff --git a/src/models/redis.js b/src/models/redis.js index d1d2757b..554638c5 100644 --- a/src/models/redis.js +++ b/src/models/redis.js @@ -836,7 +836,7 @@ class RedisClient { for (const key of keys) { const accountData = await this.client.hgetall(key) if (accountData && Object.keys(accountData).length > 0) { - accounts.push({ id: key.replace('claude:account:', ''), ...accountData }) + accounts.push({ id: key.replace('openai:account:', ''), ...accountData }) } } return accounts diff --git a/src/routes/admin.js b/src/routes/admin.js index ac11ab29..3a53d065 100644 --- a/src/routes/admin.js +++ b/src/routes/admin.js @@ -4,6 +4,7 @@ const claudeAccountService = require('../services/claudeAccountService') const claudeConsoleAccountService = require('../services/claudeConsoleAccountService') const bedrockAccountService = require('../services/bedrockAccountService') const geminiAccountService = require('../services/geminiAccountService') +const openaiAccountService = require('../services/openaiAccountService') const accountGroupService = require('../services/accountGroupService') const redis = require('../models/redis') const { authenticateAdmin } = require('../middleware/auth') @@ -388,6 +389,7 @@ router.post('/api-keys', authenticateAdmin, async (req, res) => { claudeAccountId, claudeConsoleAccountId, geminiAccountId, + openaiAccountId, permissions, concurrencyLimit, rateLimitWindow, @@ -483,6 +485,7 @@ router.post('/api-keys', authenticateAdmin, async (req, res) => { claudeAccountId, claudeConsoleAccountId, geminiAccountId, + openaiAccountId, permissions, concurrencyLimit, rateLimitWindow, @@ -515,6 +518,7 @@ router.post('/api-keys/batch', authenticateAdmin, async (req, res) => { claudeAccountId, claudeConsoleAccountId, geminiAccountId, + openaiAccountId, permissions, concurrencyLimit, rateLimitWindow, @@ -557,6 +561,7 @@ router.post('/api-keys/batch', authenticateAdmin, async (req, res) => { claudeAccountId, claudeConsoleAccountId, geminiAccountId, + openaiAccountId, permissions, concurrencyLimit, rateLimitWindow, @@ -626,6 +631,7 @@ router.put('/api-keys/:keyId', authenticateAdmin, async (req, res) => { claudeAccountId, claudeConsoleAccountId, geminiAccountId, + openaiAccountId, permissions, enableModelRestriction, restrictedModels, @@ -684,12 +690,17 @@ router.put('/api-keys/:keyId', authenticateAdmin, async (req, res) => { updates.geminiAccountId = geminiAccountId || '' } + if (openaiAccountId !== undefined) { + // 空字符串表示解绑,null或空字符串都设置为空字符串 + updates.openaiAccountId = openaiAccountId || '' + } + if (permissions !== undefined) { // 验证权限值 - if (!['claude', 'gemini', 'all'].includes(permissions)) { + if (!['claude', 'gemini', 'openai', 'all'].includes(permissions)) { return res .status(400) - .json({ error: 'Invalid permissions value. Must be claude, gemini, or all' }) + .json({ error: 'Invalid permissions value. Must be claude, gemini, openai, or all' }) } updates.permissions = permissions } @@ -894,6 +905,11 @@ router.get('/account-groups/:groupId/members', authenticateAdmin, async (req, re account = await geminiAccountService.getAccount(memberId) } + // 如果还找不到,尝试OpenAI账户 + if (!account) { + account = await openaiAccountService.getAccount(memberId) + } + if (account) { members.push(account) } @@ -2396,6 +2412,7 @@ router.get('/dashboard', authenticateAdmin, async (req, res) => { claudeConsoleAccounts, geminiAccounts, bedrockAccountsResult, + openaiAccounts, todayStats, systemAverages, realtimeMetrics @@ -2406,6 +2423,7 @@ router.get('/dashboard', authenticateAdmin, async (req, res) => { claudeConsoleAccountService.getAllAccounts(), geminiAccountService.getAllAccounts(), bedrockAccountService.getAllAccounts(), + redis.getAllOpenAIAccounts(), redis.getTodayStats(), redis.getSystemAverages(), redis.getRealtimeSystemMetrics() @@ -2543,6 +2561,39 @@ router.get('/dashboard', authenticateAdmin, async (req, res) => { (acc) => acc.rateLimitStatus && acc.rateLimitStatus.isRateLimited ).length + // OpenAI账户统计 + // 注意:OpenAI账户的isActive和schedulable是字符串类型,默认值为'true' + const normalOpenAIAccounts = openaiAccounts.filter( + (acc) => + (acc.isActive === 'true' || + acc.isActive === true || + (!acc.isActive && acc.isActive !== 'false' && acc.isActive !== false)) && + acc.status !== 'blocked' && + acc.status !== 'unauthorized' && + acc.schedulable !== 'false' && + acc.schedulable !== false && // 包括'true'、true和undefined + !(acc.rateLimitStatus && acc.rateLimitStatus.isRateLimited) + ).length + const abnormalOpenAIAccounts = openaiAccounts.filter( + (acc) => + acc.isActive === 'false' || + acc.isActive === false || + acc.status === 'blocked' || + acc.status === 'unauthorized' + ).length + const pausedOpenAIAccounts = openaiAccounts.filter( + (acc) => + (acc.schedulable === 'false' || acc.schedulable === false) && + (acc.isActive === 'true' || + acc.isActive === true || + (!acc.isActive && acc.isActive !== 'false' && acc.isActive !== false)) && + acc.status !== 'blocked' && + acc.status !== 'unauthorized' + ).length + const rateLimitedOpenAIAccounts = openaiAccounts.filter( + (acc) => acc.rateLimitStatus && acc.rateLimitStatus.isRateLimited + ).length + const dashboard = { overview: { totalApiKeys: apiKeys.length, @@ -2552,27 +2603,32 @@ router.get('/dashboard', authenticateAdmin, async (req, res) => { claudeAccounts.length + claudeConsoleAccounts.length + geminiAccounts.length + - bedrockAccounts.length, + bedrockAccounts.length + + openaiAccounts.length, normalAccounts: normalClaudeAccounts + normalClaudeConsoleAccounts + normalGeminiAccounts + - normalBedrockAccounts, + normalBedrockAccounts + + normalOpenAIAccounts, abnormalAccounts: abnormalClaudeAccounts + abnormalClaudeConsoleAccounts + abnormalGeminiAccounts + - abnormalBedrockAccounts, + abnormalBedrockAccounts + + abnormalOpenAIAccounts, pausedAccounts: pausedClaudeAccounts + pausedClaudeConsoleAccounts + pausedGeminiAccounts + - pausedBedrockAccounts, + pausedBedrockAccounts + + pausedOpenAIAccounts, rateLimitedAccounts: rateLimitedClaudeAccounts + rateLimitedClaudeConsoleAccounts + rateLimitedGeminiAccounts + - rateLimitedBedrockAccounts, + rateLimitedBedrockAccounts + + rateLimitedOpenAIAccounts, // 各平台详细统计 accountsByPlatform: { claude: { @@ -2602,6 +2658,13 @@ router.get('/dashboard', authenticateAdmin, async (req, res) => { abnormal: abnormalBedrockAccounts, paused: pausedBedrockAccounts, rateLimited: rateLimitedBedrockAccounts + }, + openai: { + total: openaiAccounts.length, + normal: normalOpenAIAccounts, + abnormal: abnormalOpenAIAccounts, + paused: pausedOpenAIAccounts, + rateLimited: rateLimitedOpenAIAccounts } }, // 保留旧字段以兼容 @@ -2609,7 +2672,8 @@ router.get('/dashboard', authenticateAdmin, async (req, res) => { normalClaudeAccounts + normalClaudeConsoleAccounts + normalGeminiAccounts + - normalBedrockAccounts, + normalBedrockAccounts + + normalOpenAIAccounts, totalClaudeAccounts: claudeAccounts.length + claudeConsoleAccounts.length, activeClaudeAccounts: normalClaudeAccounts + normalClaudeConsoleAccounts, rateLimitedClaudeAccounts: rateLimitedClaudeAccounts + rateLimitedClaudeConsoleAccounts, @@ -4513,7 +4577,7 @@ router.post('/openai-accounts/exchange-code', authenticateAdmin, async (req, res // 获取所有 OpenAI 账户 router.get('/openai-accounts', authenticateAdmin, async (req, res) => { try { - const accounts = await redis.getAllOpenAIAccounts() + const accounts = await openaiAccountService.getAllAccounts() logger.info(`获取 OpenAI 账户列表: ${accounts.length} 个账户`) @@ -4553,60 +4617,41 @@ router.post('/openai-accounts', authenticateAdmin, async (req, res) => { message: '账户名称不能为空' }) } - const id = uuidv4() // 创建账户数据 const accountData = { - id, name, description: description || '', - platform: 'openai', accountType: accountType || 'shared', - groupId: groupId || null, - dedicatedApiKeys: dedicatedApiKeys || [], priority: priority || 50, rateLimitDuration: rateLimitDuration || 60, - enabled: true, - idToken: claudeAccountService._encryptSensitiveData(openaiOauth.idToken), - accessToken: claudeAccountService._encryptSensitiveData(openaiOauth.accessToken), - refreshToken: claudeAccountService._encryptSensitiveData(openaiOauth.refreshToken), - accountId: accountInfo?.accountId || '', - expiresAt: (Math.floor(Date.now() / 1000) + openaiOauth.expires_in) * 1000, - chatgptUserId: accountInfo?.chatgptUserId || '', - organizationId: accountInfo?.organizationId || '', - organizationRole: accountInfo?.organizationRole || '', - organizationTitle: accountInfo?.organizationTitle || '', - planType: accountInfo?.planType || '', - email: claudeAccountService._encryptSensitiveData(accountInfo?.email || ''), - emailVerified: accountInfo?.emailVerified || false, + openaiOauth: openaiOauth || {}, + accountInfo: accountInfo || {}, + proxy: proxy?.enabled + ? { + type: proxy.type, + host: proxy.host, + port: proxy.port, + username: proxy.username || null, + password: proxy.password || null + } + : null, isActive: true, - status: 'active', - lastRefresh: new Date().toISOString(), - createdAt: new Date().toISOString(), - updatedAt: new Date().toISOString() + schedulable: true } - // 存储代理配置(如果提供) - if (proxy?.enabled) { - accountData.proxy = { - type: proxy.type, - host: proxy.host, - port: proxy.port, - username: proxy.username || null, - password: proxy.password || null - } + // 创建账户 + const createdAccount = await openaiAccountService.createAccount(accountData) + + // 如果是分组类型,添加到分组 + if (accountType === 'group' && groupId) { + await accountGroupService.addAccountToGroup(createdAccount.id, groupId, 'openai') } - // 保存到 Redis - const accountId = await redis.setOpenAiAccount(id, accountData) - - logger.success(`✅ 创建 OpenAI 账户成功: ${name} (ID: ${accountId})`) + logger.success(`✅ 创建 OpenAI 账户成功: ${name} (ID: ${createdAccount.id})`) return res.json({ success: true, - data: { - id: accountId, - ...accountData - } + data: createdAccount }) } catch (error) { logger.error('创建 OpenAI 账户失败:', error) @@ -4619,19 +4664,100 @@ router.post('/openai-accounts', authenticateAdmin, async (req, res) => { }) // 更新 OpenAI 账户 -router.put('/openai-accounts/:id', authenticateAdmin, async (req, res) => - //TODO: - res.json({ - success: true - }) -) +router.put('/openai-accounts/:id', authenticateAdmin, async (req, res) => { + try { + const { id } = req.params + const updates = req.body + + // 验证accountType的有效性 + if (updates.accountType && !['shared', 'dedicated', 'group'].includes(updates.accountType)) { + return res + .status(400) + .json({ error: 'Invalid account type. Must be "shared", "dedicated" or "group"' }) + } + + // 如果更新为分组类型,验证groupId + if (updates.accountType === 'group' && !updates.groupId) { + return res.status(400).json({ error: 'Group ID is required for group type accounts' }) + } + + // 获取账户当前信息以处理分组变更 + const currentAccount = await openaiAccountService.getAccount(id) + if (!currentAccount) { + return res.status(404).json({ error: 'Account not found' }) + } + + // 处理分组的变更 + if (updates.accountType !== undefined) { + // 如果之前是分组类型,需要从原分组中移除 + if (currentAccount.accountType === 'group') { + const oldGroup = await accountGroupService.getAccountGroup(id) + if (oldGroup) { + await accountGroupService.removeAccountFromGroup(id, oldGroup.id) + } + } + // 如果新类型是分组,添加到新分组 + if (updates.accountType === 'group' && updates.groupId) { + await accountGroupService.addAccountToGroup(id, updates.groupId, 'openai') + } + } + + // 准备更新数据 + const updateData = { ...updates } + + // 处理敏感数据加密 + if (updates.openaiOauth) { + updateData.openaiOauth = updates.openaiOauth + if (updates.openaiOauth.idToken) { + updateData.idToken = updates.openaiOauth.idToken + } + if (updates.openaiOauth.accessToken) { + updateData.accessToken = updates.openaiOauth.accessToken + } + if (updates.openaiOauth.refreshToken) { + updateData.refreshToken = updates.openaiOauth.refreshToken + } + if (updates.openaiOauth.expires_in) { + updateData.expiresAt = new Date( + Date.now() + updates.openaiOauth.expires_in * 1000 + ).toISOString() + } + } + + // 更新账户信息 + if (updates.accountInfo) { + updateData.accountId = updates.accountInfo.accountId || currentAccount.accountId + updateData.chatgptUserId = updates.accountInfo.chatgptUserId || currentAccount.chatgptUserId + updateData.organizationId = + updates.accountInfo.organizationId || currentAccount.organizationId + updateData.organizationRole = + updates.accountInfo.organizationRole || currentAccount.organizationRole + updateData.organizationTitle = + updates.accountInfo.organizationTitle || currentAccount.organizationTitle + updateData.planType = updates.accountInfo.planType || currentAccount.planType + updateData.email = updates.accountInfo.email || currentAccount.email + updateData.emailVerified = + updates.accountInfo.emailVerified !== undefined + ? updates.accountInfo.emailVerified + : currentAccount.emailVerified + } + + const updatedAccount = await openaiAccountService.updateAccount(id, updateData) + + logger.success(`📝 Admin updated OpenAI account: ${id}`) + return res.json({ success: true, data: updatedAccount }) + } catch (error) { + logger.error('❌ Failed to update OpenAI account:', error) + return res.status(500).json({ error: 'Failed to update account', message: error.message }) + } +}) // 删除 OpenAI 账户 router.delete('/openai-accounts/:id', authenticateAdmin, async (req, res) => { try { const { id } = req.params - const account = await redis.getOpenAiAccount(id) + const account = await openaiAccountService.getAccount(id) if (!account) { return res.status(404).json({ success: false, @@ -4639,7 +4765,15 @@ router.delete('/openai-accounts/:id', authenticateAdmin, async (req, res) => { }) } - await redis.deleteOpenAiAccount(id) + // 如果账户在分组中,从分组中移除 + if (account.accountType === 'group') { + const group = await accountGroupService.getAccountGroup(id) + if (group) { + await accountGroupService.removeAccountFromGroup(id, group.id) + } + } + + await openaiAccountService.deleteAccount(id) logger.success(`✅ 删除 OpenAI 账户成功: ${account.name} (ID: ${id})`) @@ -4695,4 +4829,30 @@ router.put('/openai-accounts/:id/toggle', authenticateAdmin, async (req, res) => } }) +// 切换 OpenAI 账户调度状态 +router.put( + '/openai-accounts/:accountId/toggle-schedulable', + authenticateAdmin, + async (req, res) => { + try { + const { accountId } = req.params + + const result = await openaiAccountService.toggleSchedulable(accountId) + + return res.json({ + success: result.success, + schedulable: result.schedulable, + message: result.schedulable ? '已启用调度' : '已禁用调度' + }) + } catch (error) { + logger.error('切换 OpenAI 账户调度状态失败:', error) + return res.status(500).json({ + success: false, + message: '切换调度状态失败', + error: error.message + }) + } + } +) + module.exports = router diff --git a/src/routes/apiStats.js b/src/routes/apiStats.js index 31e47c7d..2b8eca6f 100644 --- a/src/routes/apiStats.js +++ b/src/routes/apiStats.js @@ -279,6 +279,9 @@ router.post('/api/user-stats', async (req, res) => { let currentWindowRequests = 0 let currentWindowTokens = 0 let currentDailyCost = 0 + let windowStartTime = null + let windowEndTime = null + let windowRemainingSeconds = null try { // 获取当前时间窗口的请求次数和Token使用量 @@ -286,9 +289,32 @@ router.post('/api/user-stats', async (req, res) => { const client = redis.getClientSafe() const requestCountKey = `rate_limit:requests:${keyId}` const tokenCountKey = `rate_limit:tokens:${keyId}` + const windowStartKey = `rate_limit:window_start:${keyId}` currentWindowRequests = parseInt((await client.get(requestCountKey)) || '0') currentWindowTokens = parseInt((await client.get(tokenCountKey)) || '0') + + // 获取窗口开始时间和计算剩余时间 + const windowStart = await client.get(windowStartKey) + if (windowStart) { + const now = Date.now() + windowStartTime = parseInt(windowStart) + const windowDuration = fullKeyData.rateLimitWindow * 60 * 1000 // 转换为毫秒 + windowEndTime = windowStartTime + windowDuration + + // 如果窗口还有效 + if (now < windowEndTime) { + windowRemainingSeconds = Math.max(0, Math.floor((windowEndTime - now) / 1000)) + } else { + // 窗口已过期,下次请求会重置 + windowStartTime = null + windowEndTime = null + windowRemainingSeconds = 0 + // 重置计数为0,因为窗口已过期 + currentWindowRequests = 0 + currentWindowTokens = 0 + } + } } // 获取当日费用 @@ -334,7 +360,11 @@ router.post('/api/user-stats', async (req, res) => { // 当前使用量 currentWindowRequests, currentWindowTokens, - currentDailyCost + currentDailyCost, + // 时间窗口信息 + windowStartTime, + windowEndTime, + windowRemainingSeconds }, // 绑定的账户信息(只显示ID,不显示敏感信息) diff --git a/src/routes/openaiRoutes.js b/src/routes/openaiRoutes.js index 3751121f..13a2da9d 100644 --- a/src/routes/openaiRoutes.js +++ b/src/routes/openaiRoutes.js @@ -3,33 +3,51 @@ const axios = require('axios') const router = express.Router() const logger = require('../utils/logger') const { authenticateApiKey } = require('../middleware/auth') -const redis = require('../models/redis') const claudeAccountService = require('../services/claudeAccountService') +const unifiedOpenAIScheduler = require('../services/unifiedOpenAIScheduler') +const openaiAccountService = require('../services/openaiAccountService') +const apiKeyService = require('../services/apiKeyService') +const crypto = require('crypto') -// 选择一个可用的 OpenAI 账户,并返回解密后的 accessToken -async function getOpenAIAuthToken() { +// 使用统一调度器选择 OpenAI 账户 +async function getOpenAIAuthToken(apiKeyData, sessionId = null, requestedModel = null) { try { - const accounts = await redis.getAllOpenAIAccounts() - if (!accounts || accounts.length === 0) { - throw new Error('No OpenAI accounts found in Redis') + // 生成会话哈希(如果有会话ID) + const sessionHash = sessionId + ? crypto.createHash('sha256').update(sessionId).digest('hex') + : null + + // 使用统一调度器选择账户 + const result = await unifiedOpenAIScheduler.selectAccountForApiKey( + apiKeyData, + sessionHash, + requestedModel + ) + + if (!result || !result.accountId) { + throw new Error('No available OpenAI account found') } - // 简单选择策略:选择第一个启用并活跃的账户 - const candidate = - accounts.find((a) => String(a.enabled) === 'true' && String(a.isActive) === 'true') || - accounts[0] - - if (!candidate || !candidate.accessToken) { - throw new Error('No valid OpenAI account with accessToken') + // 获取账户详情 + const account = await openaiAccountService.getAccount(result.accountId) + if (!account || !account.accessToken) { + throw new Error(`OpenAI account ${result.accountId} has no valid accessToken`) } - const accessToken = claudeAccountService._decryptSensitiveData(candidate.accessToken) + // 解密 accessToken + const accessToken = claudeAccountService._decryptSensitiveData(account.accessToken) if (!accessToken) { throw new Error('Failed to decrypt OpenAI accessToken') } - return { accessToken, accountId: candidate.accountId || 'unknown' } + + logger.info(`Selected OpenAI account: ${account.name} (${result.accountId})`) + return { + accessToken, + accountId: result.accountId, + accountName: account.name + } } catch (error) { - logger.error('Failed to get OpenAI auth token from Redis:', error) + logger.error('Failed to get OpenAI auth token:', error) throw error } } @@ -37,7 +55,27 @@ async function getOpenAIAuthToken() { router.post('/responses', authenticateApiKey, async (req, res) => { let upstream = null try { - const { accessToken, accountId } = await getOpenAIAuthToken() + // 从中间件获取 API Key 数据 + const apiKeyData = req.apiKeyData || {} + + // 从请求头或请求体中提取会话 ID + const sessionId = + req.headers['session_id'] || + req.headers['x-session-id'] || + req.body?.session_id || + req.body?.conversation_id || + null + + // 从请求体中提取模型和流式标志 + const requestedModel = req.body?.model || null + const isStream = req.body?.stream !== false // 默认为流式(兼容现有行为) + + // 使用调度器选择账户 + const { accessToken, accountId } = await getOpenAIAuthToken( + apiKeyData, + sessionId, + requestedModel + ) // 基于白名单构造上游所需的请求头,确保键为小写且值受控 const incoming = req.headers || {} @@ -54,21 +92,39 @@ router.post('/responses', authenticateApiKey, async (req, res) => { headers['authorization'] = `Bearer ${accessToken}` headers['chatgpt-account-id'] = accountId headers['host'] = 'chatgpt.com' - headers['accept'] = 'text/event-stream' + headers['accept'] = isStream ? 'text/event-stream' : 'application/json' headers['content-type'] = 'application/json' req.body['store'] = false - // 使用流式转发,保持与上游一致 - upstream = await axios.post('https://chatgpt.com/backend-api/codex/responses', req.body, { - headers, - responseType: 'stream', - timeout: 60000, - validateStatus: () => true - }) + + // 根据 stream 参数决定请求类型 + if (isStream) { + // 流式请求 + upstream = await axios.post('https://chatgpt.com/backend-api/codex/responses', req.body, { + headers, + responseType: 'stream', + timeout: 60000, + validateStatus: () => true + }) + } else { + // 非流式请求 + upstream = await axios.post('https://chatgpt.com/backend-api/codex/responses', req.body, { + headers, + timeout: 60000, + validateStatus: () => true + }) + } res.status(upstream.status) - res.setHeader('Content-Type', 'text/event-stream') - res.setHeader('Cache-Control', 'no-cache') - res.setHeader('Connection', 'keep-alive') - res.setHeader('X-Accel-Buffering', 'no') + + if (isStream) { + // 流式响应头 + res.setHeader('Content-Type', 'text/event-stream') + res.setHeader('Cache-Control', 'no-cache') + res.setHeader('Connection', 'keep-alive') + res.setHeader('X-Accel-Buffering', 'no') + } else { + // 非流式响应头 + res.setHeader('Content-Type', 'application/json') + } // 透传关键诊断头,避免传递不安全或与传输相关的头 const passThroughHeaderKeys = ['openai-version', 'x-request-id', 'openai-processing-ms'] @@ -79,11 +135,170 @@ router.post('/responses', authenticateApiKey, async (req, res) => { } } - // 立即刷新响应头,开始 SSE - if (typeof res.flushHeaders === 'function') { - res.flushHeaders() + if (isStream) { + // 立即刷新响应头,开始 SSE + if (typeof res.flushHeaders === 'function') { + res.flushHeaders() + } } + // 处理响应并捕获 usage 数据和真实的 model + let buffer = '' + let usageData = null + let actualModel = null + let usageReported = false + + if (!isStream) { + // 非流式响应处理 + try { + logger.info(`📄 Processing OpenAI non-stream response for model: ${requestedModel}`) + + // 直接获取完整响应 + const responseData = upstream.data + + // 从响应中获取实际的 model 和 usage + actualModel = responseData.model || requestedModel || 'gpt-4' + usageData = responseData.usage + + logger.debug(`📊 Non-stream response - Model: ${actualModel}, Usage:`, usageData) + + // 记录使用统计 + if (usageData) { + const inputTokens = usageData.input_tokens || usageData.prompt_tokens || 0 + const outputTokens = usageData.output_tokens || usageData.completion_tokens || 0 + const cacheCreateTokens = usageData.input_tokens_details?.cache_creation_tokens || 0 + const cacheReadTokens = usageData.input_tokens_details?.cached_tokens || 0 + + await apiKeyService.recordUsage( + apiKeyData.id, + inputTokens, + outputTokens, + cacheCreateTokens, + cacheReadTokens, + actualModel, + accountId + ) + + logger.info( + `📊 Recorded OpenAI non-stream usage - Input: ${inputTokens}, Output: ${outputTokens}, Total: ${usageData.total_tokens || inputTokens + outputTokens}, Model: ${actualModel}` + ) + } + + // 返回响应 + res.json(responseData) + return + } catch (error) { + logger.error('Failed to process non-stream response:', error) + if (!res.headersSent) { + res.status(500).json({ error: { message: 'Failed to process response' } }) + } + return + } + } + + // 解析 SSE 事件以捕获 usage 数据和 model + const parseSSEForUsage = (data) => { + const lines = data.split('\n') + + for (const line of lines) { + if (line.startsWith('event: response.completed')) { + // 下一行应该是数据 + continue + } + + if (line.startsWith('data: ')) { + try { + const jsonStr = line.slice(6) // 移除 'data: ' 前缀 + const eventData = JSON.parse(jsonStr) + + // 检查是否是 response.completed 事件 + if (eventData.type === 'response.completed' && eventData.response) { + // 从响应中获取真实的 model + if (eventData.response.model) { + actualModel = eventData.response.model + logger.debug(`📊 Captured actual model: ${actualModel}`) + } + + // 获取 usage 数据 + if (eventData.response.usage) { + usageData = eventData.response.usage + logger.debug('📊 Captured OpenAI usage data:', usageData) + } + } + } catch (e) { + // 忽略解析错误 + } + } + } + } + + upstream.data.on('data', (chunk) => { + try { + const chunkStr = chunk.toString() + + // 转发数据给客户端 + if (!res.headersSent) { + res.write(chunk) + } + + // 同时解析数据以捕获 usage 信息 + buffer += chunkStr + + // 处理完整的 SSE 事件 + if (buffer.includes('\n\n')) { + const events = buffer.split('\n\n') + buffer = events.pop() || '' // 保留最后一个可能不完整的事件 + + for (const event of events) { + if (event.trim()) { + parseSSEForUsage(event) + } + } + } + } catch (error) { + logger.error('Error processing OpenAI stream chunk:', error) + } + }) + + upstream.data.on('end', async () => { + // 处理剩余的 buffer + if (buffer.trim()) { + parseSSEForUsage(buffer) + } + + // 记录使用统计 + if (!usageReported && usageData) { + try { + const inputTokens = usageData.input_tokens || 0 + const outputTokens = usageData.output_tokens || 0 + const cacheCreateTokens = usageData.input_tokens_details?.cache_creation_tokens || 0 + const cacheReadTokens = usageData.input_tokens_details?.cached_tokens || 0 + + // 使用响应中的真实 model,如果没有则使用请求中的 model,最后回退到默认值 + const modelToRecord = actualModel || requestedModel || 'gpt-4' + + await apiKeyService.recordUsage( + apiKeyData.id, + inputTokens, + outputTokens, + cacheCreateTokens, + cacheReadTokens, + modelToRecord, + accountId + ) + + logger.info( + `📊 Recorded OpenAI usage - Input: ${inputTokens}, Output: ${outputTokens}, Total: ${usageData.total_tokens || inputTokens + outputTokens}, Model: ${modelToRecord} (actual: ${actualModel}, requested: ${requestedModel})` + ) + usageReported = true + } catch (error) { + logger.error('Failed to record OpenAI usage:', error) + } + } + + res.end() + }) + upstream.data.on('error', (err) => { logger.error('Upstream stream error:', err) if (!res.headersSent) { @@ -93,8 +308,6 @@ router.post('/responses', authenticateApiKey, async (req, res) => { } }) - upstream.data.pipe(res) - // 客户端断开时清理上游流 const cleanup = () => { try { @@ -116,4 +329,65 @@ router.post('/responses', authenticateApiKey, async (req, res) => { } }) +// 使用情况统计端点 +router.get('/usage', authenticateApiKey, async (req, res) => { + try { + const { usage } = req.apiKey + + res.json({ + object: 'usage', + total_tokens: usage.total.tokens, + total_requests: usage.total.requests, + daily_tokens: usage.daily.tokens, + daily_requests: usage.daily.requests, + monthly_tokens: usage.monthly.tokens, + monthly_requests: usage.monthly.requests + }) + } catch (error) { + logger.error('Failed to get usage stats:', error) + res.status(500).json({ + error: { + message: 'Failed to retrieve usage statistics', + type: 'api_error' + } + }) + } +}) + +// API Key 信息端点 +router.get('/key-info', authenticateApiKey, async (req, res) => { + try { + const keyData = req.apiKey + res.json({ + id: keyData.id, + name: keyData.name, + description: keyData.description, + permissions: keyData.permissions || 'all', + token_limit: keyData.tokenLimit, + tokens_used: keyData.usage.total.tokens, + tokens_remaining: + keyData.tokenLimit > 0 + ? Math.max(0, keyData.tokenLimit - keyData.usage.total.tokens) + : null, + rate_limit: { + window: keyData.rateLimitWindow, + requests: keyData.rateLimitRequests + }, + usage: { + total: keyData.usage.total, + daily: keyData.usage.daily, + monthly: keyData.usage.monthly + } + }) + } catch (error) { + logger.error('Failed to get key info:', error) + res.status(500).json({ + error: { + message: 'Failed to retrieve API key information', + type: 'api_error' + } + }) + } +}) + module.exports = router diff --git a/src/services/accountGroupService.js b/src/services/accountGroupService.js index cfd0c50a..078ba5b6 100644 --- a/src/services/accountGroupService.js +++ b/src/services/accountGroupService.js @@ -27,8 +27,8 @@ class AccountGroupService { } // 验证平台类型 - if (!['claude', 'gemini'].includes(platform)) { - throw new Error('平台类型必须是 claude 或 gemini') + if (!['claude', 'gemini', 'openai'].includes(platform)) { + throw new Error('平台类型必须是 claude、gemini 或 openai') } const client = redis.getClientSafe() @@ -309,7 +309,9 @@ class AccountGroupService { const keyData = await client.hgetall(`api_key:${keyId}`) if ( keyData && - (keyData.claudeAccountId === groupKey || keyData.geminiAccountId === groupKey) + (keyData.claudeAccountId === groupKey || + keyData.geminiAccountId === groupKey || + keyData.openaiAccountId === groupKey) ) { boundApiKeys.push({ id: keyId, diff --git a/src/services/apiKeyService.js b/src/services/apiKeyService.js index 750990d4..4309e665 100644 --- a/src/services/apiKeyService.js +++ b/src/services/apiKeyService.js @@ -19,7 +19,8 @@ class ApiKeyService { claudeAccountId = null, claudeConsoleAccountId = null, geminiAccountId = null, - permissions = 'all', // 'claude', 'gemini', 'all' + openaiAccountId = null, + permissions = 'all', // 'claude', 'gemini', 'openai', 'all' isActive = true, concurrencyLimit = 0, rateLimitWindow = null, @@ -50,6 +51,7 @@ class ApiKeyService { claudeAccountId: claudeAccountId || '', claudeConsoleAccountId: claudeConsoleAccountId || '', geminiAccountId: geminiAccountId || '', + openaiAccountId: openaiAccountId || '', permissions: permissions || 'all', enableModelRestriction: String(enableModelRestriction), restrictedModels: JSON.stringify(restrictedModels || []), @@ -81,6 +83,7 @@ class ApiKeyService { claudeAccountId: keyData.claudeAccountId, claudeConsoleAccountId: keyData.claudeConsoleAccountId, geminiAccountId: keyData.geminiAccountId, + openaiAccountId: keyData.openaiAccountId, permissions: keyData.permissions, enableModelRestriction: keyData.enableModelRestriction === 'true', restrictedModels: JSON.parse(keyData.restrictedModels), @@ -167,6 +170,7 @@ class ApiKeyService { claudeAccountId: keyData.claudeAccountId, claudeConsoleAccountId: keyData.claudeConsoleAccountId, geminiAccountId: keyData.geminiAccountId, + openaiAccountId: keyData.openaiAccountId, permissions: keyData.permissions || 'all', tokenLimit: parseInt(keyData.tokenLimit), concurrencyLimit: parseInt(keyData.concurrencyLimit || 0), @@ -299,6 +303,7 @@ class ApiKeyService { 'claudeAccountId', 'claudeConsoleAccountId', 'geminiAccountId', + 'openaiAccountId', 'permissions', 'expiresAt', 'enableModelRestriction', diff --git a/src/services/openaiAccountService.js b/src/services/openaiAccountService.js new file mode 100644 index 00000000..6d242da1 --- /dev/null +++ b/src/services/openaiAccountService.js @@ -0,0 +1,583 @@ +const redisClient = require('../models/redis') +const { v4: uuidv4 } = require('uuid') +const crypto = require('crypto') +const config = require('../../config/config') +const logger = require('../utils/logger') +const { maskToken } = require('../utils/tokenMask') +const { + logRefreshStart, + logRefreshSuccess, + logRefreshError, + logTokenUsage, + logRefreshSkipped +} = require('../utils/tokenRefreshLogger') +const tokenRefreshService = require('./tokenRefreshService') + +// 加密相关常量 +const ALGORITHM = 'aes-256-cbc' +const ENCRYPTION_SALT = 'openai-account-salt' +const IV_LENGTH = 16 + +// 生成加密密钥(使用与 claudeAccountService 相同的方法) +function generateEncryptionKey() { + return crypto.scryptSync(config.security.encryptionKey, ENCRYPTION_SALT, 32) +} + +// OpenAI 账户键前缀 +const OPENAI_ACCOUNT_KEY_PREFIX = 'openai:account:' +const SHARED_OPENAI_ACCOUNTS_KEY = 'shared_openai_accounts' +const ACCOUNT_SESSION_MAPPING_PREFIX = 'openai_session_account_mapping:' + +// 加密函数 +function encrypt(text) { + if (!text) { + return '' + } + const key = generateEncryptionKey() + const iv = crypto.randomBytes(IV_LENGTH) + const cipher = crypto.createCipheriv(ALGORITHM, key, iv) + let encrypted = cipher.update(text) + encrypted = Buffer.concat([encrypted, cipher.final()]) + return `${iv.toString('hex')}:${encrypted.toString('hex')}` +} + +// 解密函数 +function decrypt(text) { + if (!text) { + return '' + } + try { + const key = generateEncryptionKey() + // IV 是固定长度的 32 个十六进制字符(16 字节) + const ivHex = text.substring(0, 32) + const encryptedHex = text.substring(33) // 跳过冒号 + + const iv = Buffer.from(ivHex, 'hex') + const encryptedText = Buffer.from(encryptedHex, 'hex') + const decipher = crypto.createDecipheriv(ALGORITHM, key, iv) + let decrypted = decipher.update(encryptedText) + decrypted = Buffer.concat([decrypted, decipher.final()]) + return decrypted.toString() + } catch (error) { + logger.error('Decryption error:', error) + return '' + } +} + +// 刷新访问令牌 +async function refreshAccessToken(refreshToken) { + try { + // OpenAI OAuth token 刷新实现 + // TODO: 实现具体的 OpenAI OAuth token 刷新逻辑 + logger.warn('OpenAI token refresh not yet implemented') + return null + } catch (error) { + logger.error('Error refreshing OpenAI access token:', error) + throw error + } +} + +// 检查 token 是否过期 +function isTokenExpired(account) { + if (!account.expiresAt) { + return false + } + return new Date(account.expiresAt) <= new Date() +} + +// 刷新账户的 access token +async function refreshAccountToken(accountId) { + const account = await getAccount(accountId) + if (!account) { + throw new Error('Account not found') + } + + const accountName = account.name || accountId + logRefreshStart(accountId, accountName, 'openai') + + // 检查是否有 refresh token + const refreshToken = account.refreshToken ? decrypt(account.refreshToken) : null + if (!refreshToken) { + logRefreshSkipped(accountId, accountName, 'openai', 'No refresh token available') + throw new Error('No refresh token available') + } + + try { + const newTokens = await refreshAccessToken(refreshToken) + if (!newTokens) { + throw new Error('Failed to refresh token') + } + + // 更新账户信息 + await updateAccount(accountId, { + accessToken: encrypt(newTokens.access_token), + expiresAt: new Date(newTokens.expiry_date).toISOString() + }) + + logRefreshSuccess(accountId, accountName, 'openai', newTokens.expiry_date) + return newTokens + } catch (error) { + logRefreshError(accountId, accountName, 'openai', error.message) + throw error + } +} + +// 创建账户 +async function createAccount(accountData) { + const accountId = uuidv4() + const now = new Date().toISOString() + + // 处理OAuth数据 + let oauthData = {} + if (accountData.openaiOauth) { + oauthData = + typeof accountData.openaiOauth === 'string' + ? JSON.parse(accountData.openaiOauth) + : accountData.openaiOauth + } + + // 处理账户信息 + const accountInfo = accountData.accountInfo || {} + + const account = { + id: accountId, + name: accountData.name, + description: accountData.description || '', + accountType: accountData.accountType || 'shared', + groupId: accountData.groupId || null, + priority: accountData.priority || 50, + rateLimitDuration: accountData.rateLimitDuration || 60, + // OAuth相关字段(加密存储) + idToken: encrypt(oauthData.idToken || ''), + accessToken: encrypt(oauthData.accessToken || ''), + refreshToken: encrypt(oauthData.refreshToken || ''), + openaiOauth: encrypt(JSON.stringify(oauthData)), + // 账户信息字段 + accountId: accountInfo.accountId || '', + chatgptUserId: accountInfo.chatgptUserId || '', + organizationId: accountInfo.organizationId || '', + organizationRole: accountInfo.organizationRole || '', + organizationTitle: accountInfo.organizationTitle || '', + planType: accountInfo.planType || '', + email: encrypt(accountInfo.email || ''), + emailVerified: accountInfo.emailVerified || false, + // 过期时间 + expiresAt: oauthData.expires_in + ? new Date(Date.now() + oauthData.expires_in * 1000).toISOString() + : new Date(Date.now() + 365 * 24 * 60 * 60 * 1000).toISOString(), // 默认1年 + // 状态字段 + isActive: accountData.isActive !== false ? 'true' : 'false', + status: 'active', + schedulable: accountData.schedulable !== false ? 'true' : 'false', + lastRefresh: now, + createdAt: now, + updatedAt: now + } + + // 代理配置 + if (accountData.proxy) { + account.proxy = + typeof accountData.proxy === 'string' ? accountData.proxy : JSON.stringify(accountData.proxy) + } + + const client = redisClient.getClientSafe() + await client.hset(`${OPENAI_ACCOUNT_KEY_PREFIX}${accountId}`, account) + + // 如果是共享账户,添加到共享账户集合 + if (account.accountType === 'shared') { + await client.sadd(SHARED_OPENAI_ACCOUNTS_KEY, accountId) + } + + logger.info(`Created OpenAI account: ${accountId}`) + return account +} + +// 获取账户 +async function getAccount(accountId) { + const client = redisClient.getClientSafe() + const accountData = await client.hgetall(`${OPENAI_ACCOUNT_KEY_PREFIX}${accountId}`) + + if (!accountData || Object.keys(accountData).length === 0) { + return null + } + + // 解密敏感数据(仅用于内部处理,不返回给前端) + if (accountData.idToken) { + accountData.idToken = decrypt(accountData.idToken) + } + if (accountData.accessToken) { + accountData.accessToken = decrypt(accountData.accessToken) + } + if (accountData.refreshToken) { + accountData.refreshToken = decrypt(accountData.refreshToken) + } + if (accountData.email) { + accountData.email = decrypt(accountData.email) + } + if (accountData.openaiOauth) { + try { + accountData.openaiOauth = JSON.parse(decrypt(accountData.openaiOauth)) + } catch (e) { + accountData.openaiOauth = null + } + } + + // 解析代理配置 + if (accountData.proxy && typeof accountData.proxy === 'string') { + try { + accountData.proxy = JSON.parse(accountData.proxy) + } catch (e) { + accountData.proxy = null + } + } + + return accountData +} + +// 更新账户 +async function updateAccount(accountId, updates) { + const existingAccount = await getAccount(accountId) + if (!existingAccount) { + throw new Error('Account not found') + } + + updates.updatedAt = new Date().toISOString() + + // 加密敏感数据 + if (updates.openaiOauth) { + const oauthData = + typeof updates.openaiOauth === 'string' + ? updates.openaiOauth + : JSON.stringify(updates.openaiOauth) + updates.openaiOauth = encrypt(oauthData) + } + if (updates.idToken) { + updates.idToken = encrypt(updates.idToken) + } + if (updates.accessToken) { + updates.accessToken = encrypt(updates.accessToken) + } + if (updates.refreshToken) { + updates.refreshToken = encrypt(updates.refreshToken) + } + if (updates.email) { + updates.email = encrypt(updates.email) + } + + // 处理代理配置 + if (updates.proxy) { + updates.proxy = + typeof updates.proxy === 'string' ? updates.proxy : JSON.stringify(updates.proxy) + } + + // 更新账户类型时处理共享账户集合 + const client = redisClient.getClientSafe() + if (updates.accountType && updates.accountType !== existingAccount.accountType) { + if (updates.accountType === 'shared') { + await client.sadd(SHARED_OPENAI_ACCOUNTS_KEY, accountId) + } else { + await client.srem(SHARED_OPENAI_ACCOUNTS_KEY, accountId) + } + } + + await client.hset(`${OPENAI_ACCOUNT_KEY_PREFIX}${accountId}`, updates) + + logger.info(`Updated OpenAI account: ${accountId}`) + + // 合并更新后的账户数据 + const updatedAccount = { ...existingAccount, ...updates } + + // 返回时解析代理配置 + if (updatedAccount.proxy && typeof updatedAccount.proxy === 'string') { + try { + updatedAccount.proxy = JSON.parse(updatedAccount.proxy) + } catch (e) { + updatedAccount.proxy = null + } + } + + return updatedAccount +} + +// 删除账户 +async function deleteAccount(accountId) { + const account = await getAccount(accountId) + if (!account) { + throw new Error('Account not found') + } + + // 从 Redis 删除 + const client = redisClient.getClientSafe() + await client.del(`${OPENAI_ACCOUNT_KEY_PREFIX}${accountId}`) + + // 从共享账户集合中移除 + if (account.accountType === 'shared') { + await client.srem(SHARED_OPENAI_ACCOUNTS_KEY, accountId) + } + + // 清理会话映射 + const sessionMappings = await client.keys(`${ACCOUNT_SESSION_MAPPING_PREFIX}*`) + for (const key of sessionMappings) { + const mappedAccountId = await client.get(key) + if (mappedAccountId === accountId) { + await client.del(key) + } + } + + logger.info(`Deleted OpenAI account: ${accountId}`) + return true +} + +// 获取所有账户 +async function getAllAccounts() { + const client = redisClient.getClientSafe() + const keys = await client.keys(`${OPENAI_ACCOUNT_KEY_PREFIX}*`) + const accounts = [] + + for (const key of keys) { + const accountData = await client.hgetall(key) + if (accountData && Object.keys(accountData).length > 0) { + // 解密敏感数据(但不返回给前端) + if (accountData.email) { + accountData.email = decrypt(accountData.email) + } + + // 屏蔽敏感信息(token等不应该返回给前端) + delete accountData.idToken + delete accountData.accessToken + delete accountData.refreshToken + delete accountData.openaiOauth + + // 获取限流状态信息 + const rateLimitInfo = await getAccountRateLimitInfo(accountData.id) + + // 解析代理配置 + if (accountData.proxy) { + try { + accountData.proxy = JSON.parse(accountData.proxy) + // 屏蔽代理密码 + if (accountData.proxy && accountData.proxy.password) { + accountData.proxy.password = '******' + } + } catch (e) { + // 如果解析失败,设置为null + accountData.proxy = null + } + } + + // 不解密敏感字段,只返回基本信息 + accounts.push({ + ...accountData, + openaiOauth: accountData.openaiOauth ? '[ENCRYPTED]' : '', + accessToken: accountData.accessToken ? '[ENCRYPTED]' : '', + refreshToken: accountData.refreshToken ? '[ENCRYPTED]' : '', + // 添加限流状态信息(统一格式) + rateLimitStatus: rateLimitInfo + ? { + isRateLimited: rateLimitInfo.isRateLimited, + rateLimitedAt: rateLimitInfo.rateLimitedAt, + minutesRemaining: rateLimitInfo.minutesRemaining + } + : { + isRateLimited: false, + rateLimitedAt: null, + minutesRemaining: 0 + } + }) + } + } + + return accounts +} + +// 选择可用账户(支持专属和共享账户) +async function selectAvailableAccount(apiKeyId, sessionHash = null) { + // 首先检查是否有粘性会话 + const client = redisClient.getClientSafe() + if (sessionHash) { + const mappedAccountId = await client.get(`${ACCOUNT_SESSION_MAPPING_PREFIX}${sessionHash}`) + + if (mappedAccountId) { + const account = await getAccount(mappedAccountId) + if (account && account.isActive === 'true' && !isTokenExpired(account)) { + logger.debug(`Using sticky session account: ${mappedAccountId}`) + return account + } + } + } + + // 获取 API Key 信息 + const apiKeyData = await client.hgetall(`api_key:${apiKeyId}`) + + // 检查是否绑定了 OpenAI 账户 + if (apiKeyData.openaiAccountId) { + const account = await getAccount(apiKeyData.openaiAccountId) + if (account && account.isActive === 'true') { + // 检查 token 是否过期 + const isExpired = isTokenExpired(account) + + // 记录token使用情况 + logTokenUsage(account.id, account.name, 'openai', account.expiresAt, isExpired) + + if (isExpired) { + await refreshAccountToken(account.id) + return await getAccount(account.id) + } + + // 创建粘性会话映射 + if (sessionHash) { + await client.setex( + `${ACCOUNT_SESSION_MAPPING_PREFIX}${sessionHash}`, + 3600, // 1小时过期 + account.id + ) + } + + return account + } + } + + // 从共享账户池选择 + const sharedAccountIds = await client.smembers(SHARED_OPENAI_ACCOUNTS_KEY) + const availableAccounts = [] + + for (const accountId of sharedAccountIds) { + const account = await getAccount(accountId) + if (account && account.isActive === 'true' && !isRateLimited(account)) { + availableAccounts.push(account) + } + } + + if (availableAccounts.length === 0) { + throw new Error('No available OpenAI accounts') + } + + // 选择使用最少的账户 + const selectedAccount = availableAccounts.reduce((prev, curr) => { + const prevUsage = parseInt(prev.totalUsage || 0) + const currUsage = parseInt(curr.totalUsage || 0) + return prevUsage <= currUsage ? prev : curr + }) + + // 检查 token 是否过期 + if (isTokenExpired(selectedAccount)) { + await refreshAccountToken(selectedAccount.id) + return await getAccount(selectedAccount.id) + } + + // 创建粘性会话映射 + if (sessionHash) { + await client.setex( + `${ACCOUNT_SESSION_MAPPING_PREFIX}${sessionHash}`, + 3600, // 1小时过期 + selectedAccount.id + ) + } + + return selectedAccount +} + +// 检查账户是否被限流 +function isRateLimited(account) { + if (account.rateLimitStatus === 'limited' && account.rateLimitedAt) { + const limitedAt = new Date(account.rateLimitedAt).getTime() + const now = Date.now() + const limitDuration = 60 * 60 * 1000 // 1小时 + + return now < limitedAt + limitDuration + } + return false +} + +// 设置账户限流状态 +async function setAccountRateLimited(accountId, isLimited) { + const updates = { + rateLimitStatus: isLimited ? 'limited' : 'normal', + rateLimitedAt: isLimited ? new Date().toISOString() : null + } + + await updateAccount(accountId, updates) + logger.info(`Set rate limit status for OpenAI account ${accountId}: ${updates.rateLimitStatus}`) +} + +// 切换账户调度状态 +async function toggleSchedulable(accountId) { + const account = await getAccount(accountId) + if (!account) { + throw new Error('Account not found') + } + + // 切换调度状态 + const newSchedulable = account.schedulable === 'false' ? 'true' : 'false' + + await updateAccount(accountId, { + schedulable: newSchedulable + }) + + logger.info(`Toggled schedulable status for OpenAI account ${accountId}: ${newSchedulable}`) + + return { + success: true, + schedulable: newSchedulable === 'true' + } +} + +// 获取账户限流信息 +async function getAccountRateLimitInfo(accountId) { + const account = await getAccount(accountId) + if (!account) { + return null + } + + if (account.rateLimitStatus === 'limited' && account.rateLimitedAt) { + const limitedAt = new Date(account.rateLimitedAt).getTime() + const now = Date.now() + const limitDuration = 60 * 60 * 1000 // 1小时 + const remainingTime = Math.max(0, limitedAt + limitDuration - now) + + return { + isRateLimited: remainingTime > 0, + rateLimitedAt: account.rateLimitedAt, + minutesRemaining: Math.ceil(remainingTime / (60 * 1000)) + } + } + + return { + isRateLimited: false, + rateLimitedAt: null, + minutesRemaining: 0 + } +} + +// 更新账户使用统计 +async function updateAccountUsage(accountId, tokens) { + const account = await getAccount(accountId) + if (!account) { + return + } + + const totalUsage = parseInt(account.totalUsage || 0) + tokens + const lastUsedAt = new Date().toISOString() + + await updateAccount(accountId, { + totalUsage: totalUsage.toString(), + lastUsedAt + }) +} + +module.exports = { + createAccount, + getAccount, + updateAccount, + deleteAccount, + getAllAccounts, + selectAvailableAccount, + refreshAccountToken, + isTokenExpired, + setAccountRateLimited, + toggleSchedulable, + getAccountRateLimitInfo, + updateAccountUsage, + encrypt, + decrypt +} diff --git a/src/services/unifiedOpenAIScheduler.js b/src/services/unifiedOpenAIScheduler.js new file mode 100644 index 00000000..cd93cb59 --- /dev/null +++ b/src/services/unifiedOpenAIScheduler.js @@ -0,0 +1,492 @@ +const openaiAccountService = require('./openaiAccountService') +const accountGroupService = require('./accountGroupService') +const redis = require('../models/redis') +const logger = require('../utils/logger') + +class UnifiedOpenAIScheduler { + constructor() { + this.SESSION_MAPPING_PREFIX = 'unified_openai_session_mapping:' + } + + // 🔧 辅助方法:检查账户是否可调度(兼容字符串和布尔值) + _isSchedulable(schedulable) { + // 如果是 undefined 或 null,默认为可调度 + if (schedulable === undefined || schedulable === null) { + return true + } + // 明确设置为 false(布尔值)或 'false'(字符串)时不可调度 + return schedulable !== false && schedulable !== 'false' + } + + // 🎯 统一调度OpenAI账号 + async selectAccountForApiKey(apiKeyData, sessionHash = null, requestedModel = null) { + try { + // 如果API Key绑定了专属账户或分组,优先使用 + if (apiKeyData.openaiAccountId) { + // 检查是否是分组 + if (apiKeyData.openaiAccountId.startsWith('group:')) { + const groupId = apiKeyData.openaiAccountId.replace('group:', '') + logger.info( + `🎯 API key ${apiKeyData.name} is bound to group ${groupId}, selecting from group` + ) + return await this.selectAccountFromGroup(groupId, sessionHash, requestedModel, apiKeyData) + } + + // 普通专属账户 + const boundAccount = await openaiAccountService.getAccount(apiKeyData.openaiAccountId) + if (boundAccount && boundAccount.isActive === 'true' && boundAccount.status !== 'error') { + logger.info( + `🎯 Using bound dedicated OpenAI account: ${boundAccount.name} (${apiKeyData.openaiAccountId}) for API key ${apiKeyData.name}` + ) + return { + accountId: apiKeyData.openaiAccountId, + accountType: 'openai' + } + } else { + logger.warn( + `⚠️ Bound OpenAI account ${apiKeyData.openaiAccountId} is not available, falling back to pool` + ) + } + } + + // 如果有会话哈希,检查是否有已映射的账户 + if (sessionHash) { + const mappedAccount = await this._getSessionMapping(sessionHash) + if (mappedAccount) { + // 验证映射的账户是否仍然可用 + const isAvailable = await this._isAccountAvailable( + mappedAccount.accountId, + mappedAccount.accountType + ) + if (isAvailable) { + logger.info( + `🎯 Using sticky session account: ${mappedAccount.accountId} (${mappedAccount.accountType}) for session ${sessionHash}` + ) + return mappedAccount + } else { + logger.warn( + `⚠️ Mapped account ${mappedAccount.accountId} is no longer available, selecting new account` + ) + await this._deleteSessionMapping(sessionHash) + } + } + } + + // 获取所有可用账户 + const availableAccounts = await this._getAllAvailableAccounts(apiKeyData, requestedModel) + + if (availableAccounts.length === 0) { + // 提供更详细的错误信息 + if (requestedModel) { + throw new Error( + `No available OpenAI accounts support the requested model: ${requestedModel}` + ) + } else { + throw new Error('No available OpenAI accounts') + } + } + + // 按优先级和最后使用时间排序 + const sortedAccounts = this._sortAccountsByPriority(availableAccounts) + + // 选择第一个账户 + const selectedAccount = sortedAccounts[0] + + // 如果有会话哈希,建立新的映射 + if (sessionHash) { + await this._setSessionMapping( + sessionHash, + selectedAccount.accountId, + selectedAccount.accountType + ) + logger.info( + `🎯 Created new sticky session mapping: ${selectedAccount.name} (${selectedAccount.accountId}, ${selectedAccount.accountType}) for session ${sessionHash}` + ) + } + + logger.info( + `🎯 Selected account: ${selectedAccount.name} (${selectedAccount.accountId}, ${selectedAccount.accountType}) with priority ${selectedAccount.priority} for API key ${apiKeyData.name}` + ) + + return { + accountId: selectedAccount.accountId, + accountType: selectedAccount.accountType + } + } catch (error) { + logger.error('❌ Failed to select account for API key:', error) + throw error + } + } + + // 📋 获取所有可用账户 + async _getAllAvailableAccounts(apiKeyData, requestedModel = null) { + const availableAccounts = [] + + // 如果API Key绑定了专属账户,优先返回 + if (apiKeyData.openaiAccountId) { + const boundAccount = await openaiAccountService.getAccount(apiKeyData.openaiAccountId) + if (boundAccount && boundAccount.isActive === 'true' && boundAccount.status !== 'error') { + const isRateLimited = await this.isAccountRateLimited(boundAccount.id) + if (!isRateLimited) { + // 检查模型支持 + if ( + requestedModel && + boundAccount.supportedModels && + boundAccount.supportedModels.length > 0 + ) { + const modelSupported = boundAccount.supportedModels.includes(requestedModel) + if (!modelSupported) { + logger.warn( + `⚠️ Bound OpenAI account ${boundAccount.name} does not support model ${requestedModel}` + ) + return availableAccounts + } + } + + logger.info( + `🎯 Using bound dedicated OpenAI account: ${boundAccount.name} (${apiKeyData.openaiAccountId})` + ) + return [ + { + ...boundAccount, + accountId: boundAccount.id, + accountType: 'openai', + priority: parseInt(boundAccount.priority) || 50, + lastUsedAt: boundAccount.lastUsedAt || '0' + } + ] + } + } else { + logger.warn(`⚠️ Bound OpenAI account ${apiKeyData.openaiAccountId} is not available`) + } + } + + // 获取所有OpenAI账户(共享池) + const openaiAccounts = await openaiAccountService.getAllAccounts() + for (const account of openaiAccounts) { + if ( + account.isActive === 'true' && + account.status !== 'error' && + (account.accountType === 'shared' || !account.accountType) && // 兼容旧数据 + this._isSchedulable(account.schedulable) + ) { + // 检查是否可调度 + + // 检查token是否过期 + const isExpired = openaiAccountService.isTokenExpired(account) + if (isExpired && !account.refreshToken) { + logger.warn( + `⚠️ OpenAI account ${account.name} token expired and no refresh token available` + ) + continue + } + + // 检查模型支持 + if (requestedModel && account.supportedModels && account.supportedModels.length > 0) { + const modelSupported = account.supportedModels.includes(requestedModel) + if (!modelSupported) { + logger.debug( + `⏭️ Skipping OpenAI account ${account.name} - doesn't support model ${requestedModel}` + ) + continue + } + } + + // 检查是否被限流 + const isRateLimited = await this.isAccountRateLimited(account.id) + if (isRateLimited) { + logger.debug(`⏭️ Skipping OpenAI account ${account.name} - rate limited`) + continue + } + + availableAccounts.push({ + ...account, + accountId: account.id, + accountType: 'openai', + priority: parseInt(account.priority) || 50, + lastUsedAt: account.lastUsedAt || '0' + }) + } + } + + return availableAccounts + } + + // 🔢 按优先级和最后使用时间排序账户 + _sortAccountsByPriority(accounts) { + return accounts.sort((a, b) => { + // 首先按优先级排序(数字越小优先级越高) + if (a.priority !== b.priority) { + return a.priority - b.priority + } + + // 优先级相同时,按最后使用时间排序(最久未使用的优先) + const aLastUsed = new Date(a.lastUsedAt || 0).getTime() + const bLastUsed = new Date(b.lastUsedAt || 0).getTime() + return aLastUsed - bLastUsed + }) + } + + // 🔍 检查账户是否可用 + async _isAccountAvailable(accountId, accountType) { + try { + if (accountType === 'openai') { + const account = await openaiAccountService.getAccount(accountId) + if (!account || account.isActive !== 'true' || account.status === 'error') { + return false + } + // 检查是否可调度 + if (!this._isSchedulable(account.schedulable)) { + logger.info(`🚫 OpenAI account ${accountId} is not schedulable`) + return false + } + return !(await this.isAccountRateLimited(accountId)) + } + return false + } catch (error) { + logger.warn(`⚠️ Failed to check account availability: ${accountId}`, error) + return false + } + } + + // 🔗 获取会话映射 + async _getSessionMapping(sessionHash) { + const client = redis.getClientSafe() + const mappingData = await client.get(`${this.SESSION_MAPPING_PREFIX}${sessionHash}`) + + if (mappingData) { + try { + return JSON.parse(mappingData) + } catch (error) { + logger.warn('⚠️ Failed to parse session mapping:', error) + return null + } + } + + return null + } + + // 💾 设置会话映射 + async _setSessionMapping(sessionHash, accountId, accountType) { + const client = redis.getClientSafe() + const mappingData = JSON.stringify({ accountId, accountType }) + + // 设置1小时过期 + await client.setex(`${this.SESSION_MAPPING_PREFIX}${sessionHash}`, 3600, mappingData) + } + + // 🗑️ 删除会话映射 + async _deleteSessionMapping(sessionHash) { + const client = redis.getClientSafe() + await client.del(`${this.SESSION_MAPPING_PREFIX}${sessionHash}`) + } + + // 🚫 标记账户为限流状态 + async markAccountRateLimited(accountId, accountType, sessionHash = null) { + try { + if (accountType === 'openai') { + await openaiAccountService.setAccountRateLimited(accountId, true) + } + + // 删除会话映射 + if (sessionHash) { + await this._deleteSessionMapping(sessionHash) + } + + return { success: true } + } catch (error) { + logger.error( + `❌ Failed to mark account as rate limited: ${accountId} (${accountType})`, + error + ) + throw error + } + } + + // ✅ 移除账户的限流状态 + async removeAccountRateLimit(accountId, accountType) { + try { + if (accountType === 'openai') { + await openaiAccountService.setAccountRateLimited(accountId, false) + } + + return { success: true } + } catch (error) { + logger.error( + `❌ Failed to remove rate limit for account: ${accountId} (${accountType})`, + error + ) + throw error + } + } + + // 🔍 检查账户是否处于限流状态 + async isAccountRateLimited(accountId) { + try { + const account = await openaiAccountService.getAccount(accountId) + if (!account) { + return false + } + + if (account.rateLimitStatus === 'limited' && account.rateLimitedAt) { + const limitedAt = new Date(account.rateLimitedAt).getTime() + const now = Date.now() + const limitDuration = 60 * 60 * 1000 // 1小时 + + return now < limitedAt + limitDuration + } + return false + } catch (error) { + logger.error(`❌ Failed to check rate limit status: ${accountId}`, error) + return false + } + } + + // 👥 从分组中选择账户 + async selectAccountFromGroup(groupId, sessionHash = null, requestedModel = null) { + try { + // 获取分组信息 + const group = await accountGroupService.getGroup(groupId) + if (!group) { + throw new Error(`Group ${groupId} not found`) + } + + if (group.platform !== 'openai') { + throw new Error(`Group ${group.name} is not an OpenAI group`) + } + + logger.info(`👥 Selecting account from OpenAI group: ${group.name}`) + + // 如果有会话哈希,检查是否有已映射的账户 + if (sessionHash) { + const mappedAccount = await this._getSessionMapping(sessionHash) + if (mappedAccount) { + // 验证映射的账户是否仍然可用并且在分组中 + const isInGroup = await this._isAccountInGroup(mappedAccount.accountId, groupId) + if (isInGroup) { + const isAvailable = await this._isAccountAvailable( + mappedAccount.accountId, + mappedAccount.accountType + ) + if (isAvailable) { + logger.info( + `🎯 Using sticky session account from group: ${mappedAccount.accountId} (${mappedAccount.accountType})` + ) + return mappedAccount + } + } + // 如果账户不可用或不在分组中,删除映射 + await this._deleteSessionMapping(sessionHash) + } + } + + // 获取分组成员 + const memberIds = await accountGroupService.getGroupMembers(groupId) + if (memberIds.length === 0) { + throw new Error(`Group ${group.name} has no members`) + } + + // 获取可用的分组成员账户 + const availableAccounts = [] + for (const memberId of memberIds) { + const account = await openaiAccountService.getAccount(memberId) + if ( + account && + account.isActive === 'true' && + account.status !== 'error' && + this._isSchedulable(account.schedulable) + ) { + // 检查token是否过期 + const isExpired = openaiAccountService.isTokenExpired(account) + if (isExpired && !account.refreshToken) { + logger.warn( + `⚠️ Group member OpenAI account ${account.name} token expired and no refresh token available` + ) + continue + } + + // 检查模型支持 + if (requestedModel && account.supportedModels && account.supportedModels.length > 0) { + const modelSupported = account.supportedModels.includes(requestedModel) + if (!modelSupported) { + logger.debug( + `⏭️ Skipping group member OpenAI account ${account.name} - doesn't support model ${requestedModel}` + ) + continue + } + } + + // 检查是否被限流 + const isRateLimited = await this.isAccountRateLimited(account.id) + if (isRateLimited) { + logger.debug(`⏭️ Skipping group member OpenAI account ${account.name} - rate limited`) + continue + } + + availableAccounts.push({ + ...account, + accountId: account.id, + accountType: 'openai', + priority: parseInt(account.priority) || 50, + lastUsedAt: account.lastUsedAt || '0' + }) + } + } + + if (availableAccounts.length === 0) { + throw new Error(`No available accounts in group ${group.name}`) + } + + // 按优先级和最后使用时间排序 + const sortedAccounts = this._sortAccountsByPriority(availableAccounts) + + // 选择第一个账户 + const selectedAccount = sortedAccounts[0] + + // 如果有会话哈希,建立新的映射 + if (sessionHash) { + await this._setSessionMapping( + sessionHash, + selectedAccount.accountId, + selectedAccount.accountType + ) + logger.info( + `🎯 Created new sticky session mapping from group: ${selectedAccount.name} (${selectedAccount.accountId})` + ) + } + + logger.info( + `🎯 Selected account from group: ${selectedAccount.name} (${selectedAccount.accountId}) with priority ${selectedAccount.priority}` + ) + + return { + accountId: selectedAccount.accountId, + accountType: selectedAccount.accountType + } + } catch (error) { + logger.error(`❌ Failed to select account from group ${groupId}:`, error) + throw error + } + } + + // 🔍 检查账户是否在分组中 + async _isAccountInGroup(accountId, groupId) { + const members = await accountGroupService.getGroupMembers(groupId) + return members.includes(accountId) + } + + // 📊 更新账户最后使用时间 + async updateAccountLastUsed(accountId, accountType) { + try { + if (accountType === 'openai') { + await openaiAccountService.updateAccount(accountId, { + lastUsedAt: new Date().toISOString() + }) + } + } catch (error) { + logger.warn(`⚠️ Failed to update last used time for account ${accountId}:`, error) + } + } +} + +module.exports = new UnifiedOpenAIScheduler() diff --git a/src/utils/costCalculator.js b/src/utils/costCalculator.js index 37462cbb..5caab3b7 100644 --- a/src/utils/costCalculator.js +++ b/src/utils/costCalculator.js @@ -81,11 +81,29 @@ class CostCalculator { if (pricingData) { // 转换动态价格格式为内部格式 + const inputPrice = (pricingData.input_cost_per_token || 0) * 1000000 // 转换为per 1M tokens + const outputPrice = (pricingData.output_cost_per_token || 0) * 1000000 + const cacheReadPrice = (pricingData.cache_read_input_token_cost || 0) * 1000000 + + // OpenAI 模型的特殊处理: + // - 如果没有 cache_creation_input_token_cost,缓存创建按普通 input 价格计费 + // - Claude 模型有专门的 cache_creation_input_token_cost + let cacheWritePrice = (pricingData.cache_creation_input_token_cost || 0) * 1000000 + + // 检测是否为 OpenAI 模型(通过模型名或 litellm_provider) + const isOpenAIModel = + model.includes('gpt') || model.includes('o1') || pricingData.litellm_provider === 'openai' + + if (isOpenAIModel && !pricingData.cache_creation_input_token_cost && cacheCreateTokens > 0) { + // OpenAI 模型:缓存创建按普通 input 价格计费 + cacheWritePrice = inputPrice + } + pricing = { - input: (pricingData.input_cost_per_token || 0) * 1000000, // 转换为per 1M tokens - output: (pricingData.output_cost_per_token || 0) * 1000000, - cacheWrite: (pricingData.cache_creation_input_token_cost || 0) * 1000000, - cacheRead: (pricingData.cache_read_input_token_cost || 0) * 1000000 + input: inputPrice, + output: outputPrice, + cacheWrite: cacheWritePrice, + cacheRead: cacheReadPrice } usingDynamicPricing = true } else { @@ -126,6 +144,13 @@ class CostCalculator { cacheWrite: this.formatCost(cacheWriteCost), cacheRead: this.formatCost(cacheReadCost), total: this.formatCost(totalCost) + }, + // 添加调试信息 + debug: { + isOpenAIModel: model.includes('gpt') || model.includes('o1'), + hasCacheCreatePrice: !!pricingData?.cache_creation_input_token_cost, + cacheCreateTokens, + cacheWritePriceUsed: pricing.cacheWrite } } } diff --git a/web/admin-spa/src/components/accounts/AccountForm.vue b/web/admin-spa/src/components/accounts/AccountForm.vue index 52146ceb..f16c244a 100644 --- a/web/admin-spa/src/components/accounts/AccountForm.vue +++ b/web/admin-spa/src/components/accounts/AccountForm.vue @@ -606,6 +606,25 @@ + +
+ +